'''
Created on 26 May 2015
@author: jmht
'''
import collections
import logging
import os
import shutil
from ample.util import ample_util
from ample.util import sequence_util
# We create this here otherwise it causes problems with pickling
TheseusVariances = collections.namedtuple('TheseusVariances', ['idx', 'resName', 'resSeq', 'variance', 'stdDev', 'rmsd', 'core'])
_logger = logging.getLogger(__name__)
[docs]class Theseus(object):
"""Class to run THESEUS to superpose pdb files and determine the per-residue variances.
.. _THESEUS website:
http://www.theseus3d.org/
"""
def __init__(self, work_dir=None, theseus_exe=None):
if theseus_exe is None:
if 'CCP4' in os.environ: theseus_exe = os.path.join(os.environ['CCP4'], 'bin', 'theseus')
if theseus_exe is None or not os.path.exists(theseus_exe) and os.access(theseus_exe, os.X_OK):
raise RuntimeError,"Cannot find theseus_exe: {0}".format(theseus_exe)
self.theseus_exe = theseus_exe
self.work_dir = None
self.var_by_res = None
self.variance_log = None
self.variance_log_test = None # For mocking up a test
self.superposed_models = None
self.aligned_models = None
self._set_work_dir(work_dir)
return
def _set_work_dir(self,work_dir):
if work_dir: self.work_dir = work_dir
if not self.work_dir: self.work_dir = os.getcwd()
if not os.path.isdir(self.work_dir): os.mkdir(self.work_dir)
return self.work_dir
[docs] def alignment_file(self, models, alignment_file=None):
"""Create an alignment file for the models - this is based on the assumption they are all the same length
but may have different residues"""
if not alignment_file: alignment_file = os.path.join(self.work_dir,'homologs.fasta')
all_seq = sequence_util.Sequence(pdb=models[0])
for model in models[1:]: all_seq += sequence_util.Sequence(pdb=model)
if not all(map(lambda x: x == len(all_seq.sequences[0]), [ len(s) for s in all_seq.sequences ])):
raise RuntimeError,'PDB files are not all of the same length!\n{0}'.format(models)
all_seq.write_fasta(alignment_file,pdbname=True)
return alignment_file
[docs] def superpose_models(self, models, work_dir=None, basename='theseus', homologs=False, alignment_file=None):
"""Superpose models and return the ensemble. Also set superposed_models and var_by_res variables.
This also sets the `superposed_models` and `var_by_res` parameters.
Parameters
----------
models : :obj:`list`
List of pdb files to be superposed.
work_dir: str
The directory to run theseus in and generate all the output files
basename : str
The stem that will be used to name all files
homologs : bool
True if the pdbs are homologous models as opposed to ab initio ones
alignment_file : str
An externally generated alignment file for homolgous models in FASTA format
Returns
-------
superposed_models : a pdb file containing an ensemble of the superposed models
"""
self._set_work_dir(work_dir)
if homologs:
# Theseus expects all the models to be in the directory that it is run in as the string given in
# the fasta header is used to construct the file names of the aligned pdb files. If a full or
# relative path is given (e.g. /foo/bar.pdb), it tries to create files called "basename_/foo/bar.pdb"
# We therefore copy the models in and then delete them afterwards
if not alignment_file: alignment_file = self.alignment_file(models)
copy_models = [ os.path.join(self.work_dir,os.path.basename(m)) for m in models ]
for orig, copy in zip(models, copy_models): shutil.copy(orig, copy)
models = copy_models
# -Z included so we don't line the models up to the principle axis and -o so that they all line
# up with the first model
#cmd = [ self.theseus_exe, '-a0', '-r', basename ]
cmd = [ self.theseus_exe, '-a0', '-r', basename, '-Z', '-o', os.path.basename(models[0]) ]
if homologs:
cmd += [ '-A', alignment_file ]
cmd += [ os.path.basename(m) for m in models ]
else:
# Not sure why we had relpath - fails some of the tests so changing
#cmd += [ os.path.relpath(m,self.work_dir) for m in models ]
cmd += models
self.theseus_log = os.path.join(self.work_dir,"tlog_{0}.log".format(basename))
retcode = ample_util.run_command(cmd,
logfile = self.theseus_log,
directory = self.work_dir)
if retcode != 0:
msg = "non-zero return code for theseus in superpose_models!\n See log: {0}".format(self.theseus_log)
_logger.critical(msg)
raise RuntimeError, msg
self.variance_log = os.path.join(self.work_dir,'{0}_variances.txt'.format(basename))
self.superposed_models = os.path.join(self.work_dir,'{0}_sup.pdb'.format(basename))
if homologs:
# Horrible - need to rename the models so that they match the names in the alignment file
self.aligned_models = []
for m in copy_models:
mb = os.path.basename(m)
aligned_model = os.path.join(self.work_dir,"{0}_{1}".format(basename,mb))
os.unlink(m)
os.rename(aligned_model, os.path.join(self.work_dir,mb))
self.aligned_models.append(mb)
# Set the variances
self.var_by_res = self.parse_variances()
return self.superposed_models
[docs] def parse_variances(self):
# The variance_log_test variable may be set if we are mocking out the variances for testing
if self.variance_log_test:
variance_log = self.variance_log_test
else:
variance_log = self.variance_log
if not os.path.isfile(variance_log): raise RuntimeError,"Cannot find theseus variance log: {0}".format(variance_log)
var_by_res = []
with open(variance_log) as f:
for i, line in enumerate(f):
# Skip header
if i==0: continue
line = line.strip()
if not line: continue # Skip blank lines
#print line
tokens = line.split()
# Different versions of theseus may have a RES card first, so need to check
if tokens[0]=="RES":
idxidx = 1
idxResName = 2
idxResSeq = 3
idxVariance = 4
idxStdDev = 5
idxRmsd = 6
idxCore = 7
else:
idxidx = 0
idxResName = 1
idxResSeq = 2
idxVariance = 3
idxStdDev = 4
idxRmsd = 5
idxCore = 6
# Core may or may not be there
isCore = False
if len(tokens) > idxCore and tokens[idxCore] == 'CORE': isCore = True
var_by_res.append( TheseusVariances( idx = int(tokens[idxidx]) - 1, # Theseus counts from 1, we count from 0,
resName = tokens[idxResName],
resSeq = int(tokens[idxResSeq]),
variance = float(tokens[idxVariance]),
stdDev = float(tokens[idxStdDev]),
rmsd = float(tokens[idxRmsd]),
core = isCore ) )
return var_by_res