Source code for ample.util.reforigin

import logging
import os

from ample.util import ample_util, pdb_edit, residue_map

logger = logging.getLogger(__name__)


[docs]class ReforiginRmsd(object): """Class to use reforigin to determine how well the model was placed. """ def __init__(self, workdir=None, cAlphaOnly=True): self.workdir = workdir if not self.workdir: self.workdir = os.getcwd() self.cAlphaOnly = cAlphaOnly # Whether to only compare c-alpha atoms self.rmsd = None self.bestNativeChain = None self.bestPlacedChain = None self.bestReforiginPdb = None return
[docs] def calculate(self, refpdb=None, targetpdb=None, outpdb=None, DMAX=100): # Don't need to do this as we process the file before hand # HACK - REFORIGIN has a limit on the length of the command line, so we need to create copies of inputfile # as this has the potentially longest path # tmptarget = ample_util.tmp_file_name()+".pdb" # shutil.copy(targetpdb, tmptarget) logfile = outpdb + ".log" cmd = "reforigin xyzin {0} xyzref {1} xyzout {2} DMAX {3}".format(targetpdb, refpdb, outpdb, DMAX).split() retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False) if retcode != 0: raise RuntimeError("Error running command: {0}".format(" ".join(cmd))) # Parse logfile to get RMSD rms = None for line in open(logfile, 'r'): if line.startswith("RMS deviation:"): rms = float(line.split()[-1]) break if not rms: raise RuntimeError("Error extracting RMS from logfile: {0}".format(logfile)) else: os.unlink(logfile) return rms
[docs] def preparePlacedPdb(self, placedPdb=None, placedChainID=None, nativeChainID=None, resSeqMap=None): """ Use pdbcur to: - extract chain to compare - strip down to CA/BB - remove any atoms that cannot be compared to the native """ # Build up stdin # Extract the chain to compare stdin = "lvchain {0}\n".format(placedChainID) # Rename it to match the native if placedChainID != nativeChainID: stdin += "renchain {0} {1}\n".format(placedChainID, nativeChainID) # Find out if there are atoms in the model that we need to remove incomparable = resSeqMap.targetIncomparable(bbMask=not self.cAlphaOnly) if len(incomparable): # Build up stdin - I'm too thick to work out the selection syntax for a discrete list for e in incomparable: stdin += "delresidue {0}\n".format(e) if self.cAlphaOnly: # Strip down to CA stdin += 'lvatom "CA[C]:*"\n' else: # Strip down to backbone atoms stdin += 'lvatom "N,CA,C,O,CB[N,C,O]"\n' # Renumber? stdin += "sernum\n" # Name the output file accordingly astr = "chain{0}".format(placedChainID) placedChainPdb = ample_util.filename_append(filename=placedPdb, astr=astr, directory=self.workdir) # Now run pdbcur to do it all cmd = "pdbcur xyzin {0} xyzout {1}".format(placedPdb, placedChainPdb).split() logfile = "{0}.log".format(placedChainPdb) retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=self.workdir, dolog=False, stdin=stdin) if retcode != 0: raise RuntimeError( "Error extracting chain from placed PDB {0} in directory {1}".format(placedPdb, self.workdir) ) else: os.unlink(logfile) return placedChainPdb
[docs] def getRmsd(self, nativePdbInfo=None, placedPdbInfo=None, refModelPdbInfo=None, workdir=None, cAlphaOnly=True): """For now just save lowest rmsd - can look at collecting more nativeInfo later Currently we assume we are only given one model and that it has already been standardised. """ if workdir: self.workdir = workdir if not self.workdir: self.workdir = os.getcwd() self.cAlphaOnly = cAlphaOnly # Whether to only compare c-alpha atoms # Run a pass to find the # chains native_chains = nativePdbInfo.models[0].chains placed_chains = placedPdbInfo.models[0].chains rmsds = {} # dict of rmsd -> ( chainIDnative, chainIDrefined, reforiginLogfile ) # Match each chain in native against refined and pick the best for nativeChainID in native_chains: if len(native_chains) == 1: # Don't need to do owt as we are just using the native as is nativeChainPdb = nativePdbInfo.pdb else: # Extract the chain from the pdb astr = "chain{0}".format(nativeChainID) nativeChainPdb = ample_util.filename_append( filename=nativePdbInfo.pdb, astr=astr, directory=self.workdir ) pdb_edit.extract_chain(nativePdbInfo.pdb, nativeChainPdb, chainID=nativeChainID) # Calculate the RefSeqMap - need to do this before we reduce to c-alphas # The second chain may be a different composition to the first, so we only generate a traceback if we fail # on the first chain. The model only has one chain, so the residueMap has to be the same for all the chains try: resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=nativePdbInfo, refChainID=nativeChainID, targetInfo=refModelPdbInfo, targetChainID='A', # Model only has one chain ) except RuntimeError: if nativeChainID == native_chains[0]: raise Exception else: # Only compare the first chain break for placedChainID in placed_chains: # Prepare the placed PDB placedChainPdb = self.preparePlacedPdb( placedPdb=placedPdbInfo.pdb, placedChainID=placedChainID, nativeChainID=nativeChainID, resSeqMap=resSeqMap, ) # Now create a PDB with the matching atoms from native that are in refined nativePdbMatch = ample_util.filename_append( filename=nativeChainPdb, astr="matched", directory=self.workdir ) pdb_edit.keep_matching( refpdb=placedChainPdb, targetpdb=nativeChainPdb, outpdb=nativePdbMatch, resSeqMap=resSeqMap ) # Now get the rmsd astr = "chain{0}_reforigin".format(nativeChainID) reforiginOut = ample_util.filename_append(filename=placedChainPdb, astr=astr, directory=self.workdir) try: rms = self.calculate(refpdb=nativePdbMatch, targetpdb=placedChainPdb, outpdb=reforiginOut) except RuntimeError as e: logger.critical( "GOT REFORIGIN ERROR for {0},{1},{2}\n{3}".format( placedChainPdb, nativeChainPdb, nativeChainID, e ) ) rms = 99999 rmsds[rms] = (nativeChainID, placedChainID, reforiginOut) # Clean up os.unlink(placedChainPdb) os.unlink(nativePdbMatch) # Now pick the best... rmsd = sorted(rmsds.keys())[0] self.rmsd = rmsd self.bestNativeChain = rmsds[rmsd][0] self.bestPlacedChain = rmsds[rmsd][1] self.bestReforiginPdb = rmsds[rmsd][2] for k in rmsds.keys(): if k != rmsd: try: os.unlink(rmsds[k][2]) except Exception: pass