#!/usr/bin/env python
import glob
import logging
import os
import platform
import re
import sys
import urllib
from ample.util import ample_util
from ample.util import exit_util
from ample.util import pdb_edit
logger = logging.getLogger(__name__)
[docs]def find_maxcluster(amoptd):
"""Return path to maxcluster binary.
If we can't find one in the path, we create a $HOME/.ample
directory and downlod it to there
"""
if amoptd['maxcluster_exe'] and ample_util.is_exe(amoptd['maxcluster_exe']):
return amoptd['maxcluster_exe']
if not amoptd['maxcluster_exe']:
if sys.platform.startswith("win"):
amoptd['maxcluster_exe']='maxcluster.exe'
else:
amoptd['maxcluster_exe']='maxcluster'
try:
maxcluster_exe = ample_util.find_exe(amoptd['maxcluster_exe'], dirs=[ amoptd['rcdir'] ] )
except ample_util.FileNotFoundError:
# Cannot find so we need to try and download it
rcdir = amoptd['rcdir']
logger.info("Cannot find maxcluster binary in path so attempting to download it directory: {0}".format( rcdir ) )
if not os.path.isdir( rcdir ):
logger.info("No ample rcdir found so creating in: {0}".format( rcdir ) )
os.mkdir( rcdir )
url = None
maxcluster_exe = os.path.join( rcdir, 'maxcluster' )
if sys.platform.startswith("linux"):
bit=platform.architecture()[0]
if bit=='64bit':
url='http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster64bit'
elif bit=='32bit':
url='http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster'
else:
msg="Unrecognised system type: {0} {1}".format(sys.platform,bit)
exit_util.exit_error(msg)
elif sys.platform.startswith("darwin"):
url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_i686_32bit.bin'
#OSX PPC: http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_PPC_32bit.bin
elif sys.platform.startswith("win"):
url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster.exe'
maxcluster_exe = os.path.join( rcdir, 'maxcluster.exe' )
else:
msg="Unrecognised system type: {0}".format( sys.platform )
exit_util.exit_error(msg)
logger.info("Attempting to download maxcluster binary from: {0}".format( url ) )
try:
urllib.urlretrieve( url, maxcluster_exe )
except Exception, e:
msg="Error downloading maxcluster executable: {0}\n{1}".format(url,e)
exit_util.exit_error(msg)
# make executable
os.chmod(maxcluster_exe, 0o777)
return maxcluster_exe
[docs]class Maxcluster(object):
"""
# Extract the first chain from the nativePdb
# Create a residueSequenceMap and see if the residues match
# If not use keep_matching to create a nativePdb that has the correct residue sequence`
# Run Maxcluster to compare the models to the native
"""
def __init__(self,maxcluster_exe):
self.maxclusterExe = maxcluster_exe
return
[docs] def compareDirectory(self, nativePdbInfo=None,
resSeqMap=None,
modelsDirectory=None,
workdir=None ):
self.data = []
self.workdir = workdir
if not self.workdir:
self.workdir = os.getcwd()
#refModel = os.path.join( modelsDirectory, "S_00000001.pdb" )
nativePdb = self.prepareNative( nativePdbInfo=nativePdbInfo, resSeqMap=resSeqMap )
logfile = os.path.join( self.workdir, "maxclusterD.log" )
self.runCompareDirectory( nativePdb=nativePdb, modelsDirectory=modelsDirectory, logfile=logfile )
self.parseLogDirectory( logfile=logfile )
return
[docs] def compareModelList(self, nativePdbInfo=None, resSeqMap=None, models=None, workdir=None):
self.data = []
self.workdir = workdir
if not self.workdir:
self.workdir = os.getcwd()
#refModel = os.path.join( modelsDirectory, "S_00000001.pdb" )
nativePdb = self.prepareNative(nativePdbInfo=nativePdbInfo, resSeqMap=resSeqMap)
logfile = os.path.join(self.workdir, "maxclusterD.log")
self.run_compare_model_list(nativePdb=nativePdb, models=models, logfile=logfile)
self.parseLogDirectory(logfile=logfile)
return
[docs] def compareSingle(self, nativePdb=None, modelPdb=None, sequenceIndependant=True, rmsd=False, workdir=None ):
self.workdir = workdir
if not self.workdir:
self.workdir = os.getcwd()
cmd = [ self.maxclusterExe, "-e", nativePdb, "-p", modelPdb ]
if sequenceIndependant:
cmd.append( "-in" )
if rmsd:
cmd.append( "-rmsd" )
logfile = ample_util.filename_append( filename=modelPdb,
astr="maxcluster",
directory=self.workdir )
if rmsd:
logfile = os.path.splitext( logfile )[0] + "_rmsd.log"
else:
logfile = os.path.splitext( logfile )[0] + ".log"
self.maxclusterLogfile = logfile
#print "running cmd "," ".join( cmd )
retcode = ample_util.run_command( cmd, logfile=self.maxclusterLogfile, dolog=False )
if retcode != 0:
msg = "non-zero return code for maxcluster in runMaxcluster!"
#logging.critical( msg )
print msg
if rmsd:
data = self.parseLogSingleRmsd()
else:
data = self.parseLogSingleTm()
return data
[docs] def prepareNative(self, nativePdbInfo=None, resSeqMap=None ):
"""do stuff"""
# Find out how many chains and extract the first if > 1
if len( nativePdbInfo.models ) > 1:
raise RuntimeError,"More than 1 model."
# Check if > 1 chain
chainID=None
if len( nativePdbInfo.models[0].chains ) > 1:
chainID=nativePdbInfo.models[0].chains[0]
# Assume native is standardised
# Extract the chain if > 1
nativePdbChain = ample_util.filename_append( filename=nativePdbInfo.pdb,
astr="chain{0}".format(chainID) )
pdb_edit.extract_chain(nativePdbInfo.pdb, nativePdbChain, chainID)
nativePdb = nativePdbChain
else:
nativePdb = nativePdbInfo.pdb
if not resSeqMap.resSeqMatch():
# We need to create a copy of the native with numbering matching the model
nativeRenumber = ample_util.filename_append( filename=nativePdb,
astr="ren" )
pdb_edit.match_resseq( targetPdb=nativePdb, outPdb=nativeRenumber, resMap=resSeqMap )
nativePdb = nativeRenumber
return nativePdb
[docs] def parseLogDirectory(self, logfile=None ):
self.data = []
assert logfile
#INFO : 1000. 2XOV_clean_ren.pdb vs. /media/data/shared/TM/2XOV/models/S_00000444.pdb Pairs= 36, RMSD= 3.065, MaxSub=0.148, TM=0.192, MSI=0.148
for line in open( logfile, 'r' ):
if re.match( "INFO *: .* vs\. .* Pairs=", line ):
# Remove spaces after =
line = re.sub("= +", "=", line )
# Now remove commas
line = line.replace(",","")
fields = line.split()
d = {}
d['pdb'] = fields[5]
tmp = os.path.splitext( os.path.basename( fields[5] ) )[0]
# # Hack to make sure there isn't something like "1_" prepended to the name
# if not tmp.startswith("S"):
# for i,f in enumerate(tmp):
# if f=="S":
# tmp=tmp[i:]
# break
d['model_name'] = tmp
label, value = fields[6].split( "=" )
assert label == "Pairs"
d['pairs'] = int( value )
label, value = fields[7].split( "=" )
assert label == "RMSD"
d['rmsd'] = float( value )
label, value = fields[8].split( "=" )
assert label == "MaxSub"
d['maxsub'] = float( value )
label, value = fields[9].split( "=" )
assert label == "TM"
d['tm'] = float( value )
label, value = fields[10].split( "=" )
assert label == "MSI"
d['msi'] = float( value )
self.data.append( d )
return
[docs] def parseLogSingleTm(self, logfile=None):
if logfile is None:
logfile = self.maxclusterLogfile
assert logfile
d = {}
for line in open( logfile, 'r' ):
line = line.strip()
#"Iter 1: Pairs= 14, RMSD= 0.155, MAXSUB=0.855. Len= 15. gRMSD= 0.673, TM=0.858
if re.match( "Iter \d?: ?Pairs=", line ):
# Remove spaces after =
line = re.sub("= +", "=", line )
# Remove trailing . after numbers
line = re.sub("\. +", " ", line )
# Now remove commas
line = line.replace(",","")
fields = line.split()
label, value = fields[2].split( "=" )
assert label == "Pairs"
d['pairs'] = int( value )
label, value = fields[3].split( "=" )
assert label == "RMSD"
d['rmsd'] = float( value )
label, value = fields[4].split( "=" )
assert label == "MAXSUB"
d['maxsub'] = float( value )
label, value = fields[5].split( "=" )
assert label == "Len"
d['length'] = float( value )
# skip gRMSD
label, value = fields[7].split( "=" )
assert label == "TM"
d['tm'] = float( value )
return d
[docs] def parseLogSingleRmsd(self, logfile=None):
if logfile is None:
logfile = self.maxclusterLogfile
assert logfile
d = {}
#INFO : 1000. 2XOV_clean_ren.pdb vs. /media/data/shared/TM/2XOV/models/S_00000444.pdb Pairs= 36, RMSD= 3.065, MaxSub=0.148, TM=0.192, MSI=0.148
for line in open(logfile, 'r'):
line = line.strip()
if line.startswith("RMSD="):
# RMSD= 0.132 (Pairs= 8, rRMSD=0.034 ( -3.11)), URMSD= 0.049 (rURMSD=0.049)
# Remove spaces after =
line = re.sub("= +", "=", line)
# Remove trailing . after numbers
line = re.sub("\. +", " ", line)
# Now remove commas and brackets
line = line.replace(",","")
line = line.replace("("," ")
fields = line.split()
label, value = fields[0].split("=")
assert label == "RMSD"
d['rmsd'] = float( value )
label, value = fields[1].split("=")
assert label == "Pairs"
d['pairs'] = int(value)
return d
[docs] def tm(self,model):
""""""
for d in self.data:
if d['pdb'] == model: return d['tm']
#s = "\n".join(traceback.format_list(traceback.extract_stack()))
print "MaxCluster tm failed to find model name: {0}\n{1}".format(model,self.data)
return None
[docs] def rmsd(self,model):
""""""
for d in self.data:
if d['pdb'] == model: return d['rmsd']
#s = "\n".join(traceback.format_list(traceback.extract_stack()))
print "MaxCluster rmsd failed to find model name: {0}\n{1}".format(model,self.data)
return None
[docs] def maxsubSorted(self, reverse=True):
return sorted(self.data, key=lambda data: data['maxsub'], reverse=reverse)
[docs] def runCompareDirectory(self, nativePdb=None, modelsDirectory=None, logfile=None):
# Generate the list of models
pdblist = os.path.join(self.workdir, "models.list")
with open(pdblist, 'w') as f:
l = glob.glob(os.path.join(modelsDirectory, '*.pdb'))
if not len(l) > 0:
raise RuntimeError,"Could not find any pdb files in directory: {0}".format(modelsDirectory)
f.write( os.linesep.join( l ) )
# Run Maxcluster
cmd = [self.maxclusterExe, "-e", nativePdb, "-l", pdblist]
retcode = ample_util.run_command(cmd, logfile=logfile, dolog=True)
if retcode != 0:
msg = "non-zero return code for maxcluster in runMaxcluster!"
raise RuntimeError(msg)
return
[docs] def run_compare_model_list(self, nativePdb=None, models=None, logfile=None):
# Generate the list of models
pdblist = os.path.join(self.workdir, "models.list")
with open(pdblist, 'w') as f:
f.write(os.linesep.join(models))
# Run Maxcluster
cmd = [self.maxclusterExe, "-e", nativePdb, "-l", pdblist]
retcode = ample_util.run_command(cmd, logfile=logfile, dolog=True)
if retcode != 0:
msg = "non-zero return code for maxcluster in runMaxcluster!"
raise RuntimeError(msg)
return
[docs] def tmSorted(self, reverse=True ):
return sorted(self.data, key=lambda data: data['tm'], reverse=reverse)