import logging
import os
from ample.util import ample_util, pdb_edit, residue_map
logger = logging.getLogger(__name__)
[docs]class ReforiginRmsd(object):
"""Class to use reforigin to determine how well the model was placed.
"""
def __init__(self, workdir=None, cAlphaOnly=True):
self.workdir = workdir
if not self.workdir:
self.workdir = os.getcwd()
self.cAlphaOnly = cAlphaOnly # Whether to only compare c-alpha atoms
self.rmsd = None
self.bestNativeChain = None
self.bestPlacedChain = None
self.bestReforiginPdb = None
return
[docs] def calculate(self, refpdb=None, targetpdb=None, outpdb=None, DMAX=100):
# Don't need to do this as we process the file before hand
# HACK - REFORIGIN has a limit on the length of the command line, so we need to create copies of inputfile
# as this has the potentially longest path
# tmptarget = ample_util.tmp_file_name()+".pdb"
# shutil.copy(targetpdb, tmptarget)
logfile = outpdb + ".log"
cmd = "reforigin xyzin {0} xyzref {1} xyzout {2} DMAX {3}".format(targetpdb, refpdb, outpdb, DMAX).split()
retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False)
if retcode != 0:
raise RuntimeError("Error running command: {0}".format(" ".join(cmd)))
# Parse logfile to get RMSD
rms = None
for line in open(logfile, 'r'):
if line.startswith("RMS deviation:"):
rms = float(line.split()[-1])
break
if not rms:
raise RuntimeError("Error extracting RMS from logfile: {0}".format(logfile))
else:
os.unlink(logfile)
return rms
[docs] def preparePlacedPdb(self, placedPdb=None, placedChainID=None, nativeChainID=None, resSeqMap=None):
"""
Use pdbcur to:
- extract chain to compare
- strip down to CA/BB
- remove any atoms that cannot be compared to the native
"""
# Build up stdin
# Extract the chain to compare
stdin = "lvchain {0}\n".format(placedChainID)
# Rename it to match the native
if placedChainID != nativeChainID:
stdin += "renchain {0} {1}\n".format(placedChainID, nativeChainID)
# Find out if there are atoms in the model that we need to remove
incomparable = resSeqMap.targetIncomparable(bbMask=not self.cAlphaOnly)
if len(incomparable):
# Build up stdin - I'm too thick to work out the selection syntax for a discrete list
for e in incomparable:
stdin += "delresidue {0}\n".format(e)
if self.cAlphaOnly:
# Strip down to CA
stdin += 'lvatom "CA[C]:*"\n'
else:
# Strip down to backbone atoms
stdin += 'lvatom "N,CA,C,O,CB[N,C,O]"\n'
# Renumber?
stdin += "sernum\n"
# Name the output file accordingly
astr = "chain{0}".format(placedChainID)
placedChainPdb = ample_util.filename_append(filename=placedPdb, astr=astr, directory=self.workdir)
# Now run pdbcur to do it all
cmd = "pdbcur xyzin {0} xyzout {1}".format(placedPdb, placedChainPdb).split()
logfile = "{0}.log".format(placedChainPdb)
retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=self.workdir, dolog=False, stdin=stdin)
if retcode != 0:
raise RuntimeError(
"Error extracting chain from placed PDB {0} in directory {1}".format(placedPdb, self.workdir)
)
else:
os.unlink(logfile)
return placedChainPdb
[docs] def getRmsd(self, nativePdbInfo=None, placedPdbInfo=None, refModelPdbInfo=None, workdir=None, cAlphaOnly=True):
"""For now just save lowest rmsd - can look at collecting more nativeInfo later
Currently we assume we are only given one model and that it has already been standardised.
"""
if workdir:
self.workdir = workdir
if not self.workdir:
self.workdir = os.getcwd()
self.cAlphaOnly = cAlphaOnly # Whether to only compare c-alpha atoms
# Run a pass to find the # chains
native_chains = nativePdbInfo.models[0].chains
placed_chains = placedPdbInfo.models[0].chains
rmsds = {} # dict of rmsd -> ( chainIDnative, chainIDrefined, reforiginLogfile )
# Match each chain in native against refined and pick the best
for nativeChainID in native_chains:
if len(native_chains) == 1:
# Don't need to do owt as we are just using the native as is
nativeChainPdb = nativePdbInfo.pdb
else:
# Extract the chain from the pdb
astr = "chain{0}".format(nativeChainID)
nativeChainPdb = ample_util.filename_append(
filename=nativePdbInfo.pdb, astr=astr, directory=self.workdir
)
pdb_edit.extract_chain(nativePdbInfo.pdb, nativeChainPdb, chainID=nativeChainID)
# Calculate the RefSeqMap - need to do this before we reduce to c-alphas
# The second chain may be a different composition to the first, so we only generate a traceback if we fail
# on the first chain. The model only has one chain, so the residueMap has to be the same for all the chains
try:
resSeqMap = residue_map.residueSequenceMap()
resSeqMap.fromInfo(
refInfo=nativePdbInfo,
refChainID=nativeChainID,
targetInfo=refModelPdbInfo,
targetChainID='A', # Model only has one chain
)
except RuntimeError:
if nativeChainID == native_chains[0]:
raise Exception
else:
# Only compare the first chain
break
for placedChainID in placed_chains:
# Prepare the placed PDB
placedChainPdb = self.preparePlacedPdb(
placedPdb=placedPdbInfo.pdb,
placedChainID=placedChainID,
nativeChainID=nativeChainID,
resSeqMap=resSeqMap,
)
# Now create a PDB with the matching atoms from native that are in refined
nativePdbMatch = ample_util.filename_append(
filename=nativeChainPdb, astr="matched", directory=self.workdir
)
pdb_edit.keep_matching(
refpdb=placedChainPdb, targetpdb=nativeChainPdb, outpdb=nativePdbMatch, resSeqMap=resSeqMap
)
# Now get the rmsd
astr = "chain{0}_reforigin".format(nativeChainID)
reforiginOut = ample_util.filename_append(filename=placedChainPdb, astr=astr, directory=self.workdir)
try:
rms = self.calculate(refpdb=nativePdbMatch, targetpdb=placedChainPdb, outpdb=reforiginOut)
except RuntimeError as e:
logger.critical(
"GOT REFORIGIN ERROR for {0},{1},{2}\n{3}".format(
placedChainPdb, nativeChainPdb, nativeChainID, e
)
)
rms = 99999
rmsds[rms] = (nativeChainID, placedChainID, reforiginOut)
# Clean up
os.unlink(placedChainPdb)
os.unlink(nativePdbMatch)
# Now pick the best...
rmsd = sorted(rmsds.keys())[0]
self.rmsd = rmsd
self.bestNativeChain = rmsds[rmsd][0]
self.bestPlacedChain = rmsds[rmsd][1]
self.bestReforiginPdb = rmsds[rmsd][2]
for k in rmsds.keys():
if k != rmsd:
try:
os.unlink(rmsds[k][2])
except Exception:
pass