Source code for ample.util.reforigin

import logging
import os

from ample.util import ample_util
from ample.util import pdb_edit
from ample.util import residue_map

logger = logging.getLogger(__name__)

[docs]class ReforiginRmsd(object):
    """Class to use reforigin to determine how well the model was placed.
    """
    def __init__( self, workdir=None, cAlphaOnly=True ):
        
        self.workdir = workdir
        if not self.workdir:
            self.workdir = os.getcwd()
        
        self.cAlphaOnly = cAlphaOnly# Whether to only compare c-alpha atoms
        
        self.rmsd = None
        self.bestNativeChain = None
        self.bestPlacedChain = None
        self.bestReforiginPdb = None
        
        return
    
[docs]    def calculate( self, refpdb=None, targetpdb=None, outpdb=None, DMAX=100 ):
        
        # Don't need to do this as we process the file before hand
        # HACK - REFORIGIN has a limit on the length of the command line, so we need to create copies of inputfile
        # as this has the potentially longest path
        #tmptarget = ample_util.tmp_file_name()+".pdb"
        #shutil.copy(targetpdb, tmptarget)
        
        logfile = outpdb+".log"
        cmd="reforigin xyzin {0} xyzref {1} xyzout {2} DMAX {3}".format( targetpdb, refpdb, outpdb, DMAX ).split()
        
        retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False)
        if retcode != 0:
            raise RuntimeError("Error running command: {0}".format( " ".join(cmd) ))
        
        # Parse logfile to get RMSD
        rms = None
        for line in open( logfile, 'r' ):
            if line.startswith("RMS deviation:"):
                rms = float( line.split()[-1] )
                break
        
        if not rms:
            raise RuntimeError("Error extracting RMS from logfile: {0}".format( logfile ))
        else:
            os.unlink(logfile)
        
        return rms

[docs]    def preparePlacedPdb( self, placedPdb=None, placedChainID=None, nativeChainID=None, resSeqMap=None ):
        """
        Use pdbcur to:
        - extract chain to compare
        - strip down to CA/BB
        - remove any atoms that cannot be compared to the native
        """
        # Build up stdin
        
        # Extract the chain to compare
        stdin = "lvchain {0}\n".format( placedChainID )
        
        # Rename it to match the native
        if placedChainID != nativeChainID:
            stdin += "renchain {0} {1}\n".format( placedChainID, nativeChainID )
        
        # Find out if there are atoms in the model that we need to remove
        incomparable = resSeqMap.targetIncomparable( bbMask=not self.cAlphaOnly )
        if len( incomparable ):
            # Build up stdin - I'm too thick to work out the selection syntax for a discrete list
            for e in incomparable:
                stdin += "delresidue {0}\n".format( e )
                
        if self.cAlphaOnly:
            # Strip down to CA
            stdin += 'lvatom "CA[C]:*"\n'
        else:
            # Strip down to backbone atoms
            stdin += 'lvatom "N,CA,C,O,CB[N,C,O]"\n'
            
        # Renumber?
        stdin += "sernum\n"
        
        # Name the output file accordingly
        astr = "chain{0}".format( placedChainID )
        placedChainPdb = ample_util.filename_append( filename=placedPdb, astr=astr, directory=self.workdir )
        
        # Now run pdbcur to do it all
        cmd="pdbcur xyzin {0} xyzout {1}".format( placedPdb, placedChainPdb ).split()
        logfile = "{0}.log".format( placedChainPdb )
        retcode = ample_util.run_command( cmd=cmd, logfile=logfile, directory=self.workdir, dolog=False, stdin=stdin)
        if retcode != 0:
            raise RuntimeError("Error extracting chain from placed PDB {0} in directory {1}".format( placedPdb, self.workdir ))
        else:
            os.unlink(logfile)
        return placedChainPdb
    
[docs]    def getRmsd( self, nativePdbInfo=None, placedPdbInfo=None, refModelPdbInfo=None, workdir=None, cAlphaOnly=True  ):
        """For now just save lowest rmsd - can look at collecting more nativeInfo later
        
        Currently we assume we are only given one model and that it has already been standardised.
        """

        if workdir:
            self.workdir = workdir
        if not self.workdir:
            self.workdir = os.getcwd()
        
        self.cAlphaOnly = cAlphaOnly# Whether to only compare c-alpha atoms
        
        # Run a pass to find the # chains
        native_chains = nativePdbInfo.models[ 0 ].chains
        placed_chains = placedPdbInfo.models[ 0 ].chains
        
        #print "got native chains ", native_chains
        #print "got placed chains ", placed_chains
            
        rmsds = {} # dict of rmsd -> ( chainIDnative, chainIDrefined, reforiginLogfile )
        
        # Match each chain in native against refined and pick the best
        for nativeChainID in native_chains:
            
            #print "native_chain: {0}".format( nativeChainID )
                    
            if len( native_chains ) == 1:
                # Don't need to do owt as we are just using the native as is
                nativeChainPdb = nativePdbInfo.pdb
            else:
                # Extract the chain from the pdb
                astr = "chain{0}".format( nativeChainID )
                nativeChainPdb = ample_util.filename_append( filename=nativePdbInfo.pdb, astr=astr, directory=self.workdir )
                pdb_edit.extract_chain( nativePdbInfo.pdb, nativeChainPdb, chainID=nativeChainID )
                
            # Calculate the RefSeqMap - need to do this before we reduce to c-alphas
            # The second chain may be a different composition to the first, so we only generate a traceback if we fail
            # on the first chain. The model only has one chain, so the residueMap has to be the same for all the chains
            try:
                resSeqMap = residue_map.residueSequenceMap()
                resSeqMap.fromInfo( refInfo=nativePdbInfo,
                                    refChainID=nativeChainID,
                                    targetInfo=refModelPdbInfo,
                                    targetChainID='A' # Model only has one chain
                                    )
                
            except RuntimeError:
                if nativeChainID == native_chains[0]:
                    raise
                else:
                    # Only compare the first chain
                    break
                
            for placedChainID in placed_chains:
                
                # Prepare the placed PDB
                placedChainPdb = self.preparePlacedPdb( placedPdb=placedPdbInfo.pdb, placedChainID=placedChainID, nativeChainID=nativeChainID, resSeqMap=resSeqMap )
                
                # Now create a PDB with the matching atoms from native that are in refined
                nativePdbMatch = ample_util.filename_append( filename=nativeChainPdb, astr="matched", directory=self.workdir )
                pdb_edit.keep_matching( refpdb=placedChainPdb, targetpdb=nativeChainPdb, outpdb=nativePdbMatch, resSeqMap=resSeqMap )
                
                # Now get the rmsd
                astr = "chain{0}_reforigin".format( nativeChainID )
                reforiginOut = ample_util.filename_append( filename=placedChainPdb, astr=astr, directory=self.workdir )
                
                try:
                    rms = self.calculate( refpdb=nativePdbMatch, targetpdb=placedChainPdb, outpdb=reforiginOut )
                except RuntimeError, e:
                    logger.critical("GOT REFORIGIN ERROR for {0},{1},{2}\n{3}".format( placedChainPdb, nativeChainPdb, nativeChainID, e))
                    rms = 99999
                rmsds[ rms ] = ( nativeChainID, placedChainID, reforiginOut )
                # Clean up
                os.unlink(placedChainPdb)
                os.unlink(nativePdbMatch)
        # End loop over chains
        # Clean up
        # Now pick the best...
        rmsd = sorted( rmsds.keys() )[ 0 ]
        #print "Got rmsds over chains: {0}".format( rmsds )
        
        self.rmsd = rmsd
        self.bestNativeChain = rmsds[ rmsd ][0]
        self.bestPlacedChain = rmsds[ rmsd ][1]
        self.bestReforiginPdb = rmsds[ rmsd ][2]
        
        # Clean up
        for k in rmsds.keys():
            if k != rmsd:
                try: os.unlink(rmsds[k][2])
                except: pass
        #print "best chain rmsd is {0} for nativeChain {1} vs refinedChain {2}".format( self.rmsd, self.bestChains[0], self.bestChains[1] )
        return