"""Module coordinating the option checking"""
__author__ = "Jens Thomas, and Felix Simkovic"
__date__ = "01 Nov 2016"
__version__ = "1.0"
import glob
import logging
import os
import shutil
import sys
from ample.constants import AMPLE_PKL
from ample.ensembler.constants import *
from ample.modelling import rosetta_model
from ample.util import ample_util
from ample.util import contact_util
from ample.util import exit_util
from ample.util import maxcluster
from ample.util import mrbump_util
from ample.util import mtz_util
from ample.util import pdb_edit
from ample.util import sequence_util
logger = logging.getLogger(__name__)
[docs]def check_mandatory_options(optd):
"""Check the mandatory options for correctness
Description
-----------
We check there here rather then with argparse as there doesn't seem
to be an easy way to get the logic to work of having overlapping
required and mutually exclusive options
"""
def _exit(msg, wdir):
exit_util.exit_error(msg)
if not (optd['fasta'] or optd['restart_pkl']):
msg = "One of -fasta or -restart_pkl option is required."
_exit(msg, optd['work_dir'])
if (optd['contact_file'] or optd['bbcontacts_file']) and optd['restraints_file']:
msg = "Only one option of -contact_file or -restraints_file allowed."
_exit(msg, optd['work_dir'])
if not optd['restart_pkl'] and not (optd['mtz'] or optd['sf_cif']):
msg = "A crystallographic data file must be supplied with the -mtz or -sc_cif options."
_exit(msg, optd['work_dir'])
if optd['do_mr'] and (optd['mtz'] and optd['sf_cif']):
msg = "Please supply a single crystallographic data file."
_exit(msg, optd['work_dir'])
if optd['devel_mode'] and optd['quick_mode']:
msg = "Only one of quick_mode or devel_mode is permitted"
_exit(msg, optd['work_dir'])
if optd['molrep_only'] and optd['phaser_only']:
msg = "Only one of molrep_only or phaser_only is permitted"
_exit(msg, optd['work_dir'])
if optd['single_model'] and not (optd['truncation_scorefile'] and optd['truncation_scorefile_header']):
msg = "Truncating a single model requires -truncation_scorefile and -truncation_scorefile_header"
_exit(msg, optd['work_dir'])
return
[docs]def process_options(optd):
"""Process the initial options from the command-line/ample.ini file to set any additional options.
Description
-----------
This is where we take the options determining the type of run we are undertaking and set any additional
options required based on that runtype. All the major
"""
# Path for pickling results
optd['results_path'] = os.path.join(optd['work_dir'], AMPLE_PKL)
###############################################################################
#
# FASTA processing
#
###############################################################################
# Check to see if mr_sequence was given and if not mr_sequence defaults to fasta
if optd['mr_sequence'] != None:
if not (os.path.exists(str(optd['mr_sequence']))):
msg = 'Cannot find mr sequence file: {0}'.format(optd['mr_sequence'])
exit_util.exit_error(msg)
else:
optd['mr_sequence'] = optd['fasta']
# Process the fasta file and run all the checks on the sequence
sequence_util.process_fasta(optd, canonicalise=True)
#
# Not sure if name actually required - see make_fragments.pl
#
if optd['name'] and len(optd['name']) != 4:
msg = '-name argument is the wrong length, use 4 chars eg ABCD'
exit_util.exit_error(msg)
# Underscore required by rosetta make_fragments.pl
optd['name'] += '_'
###############################################################################
#
# Contact file processing
#
###############################################################################
#if False:
if optd['contact_file'] or optd['bbcontacts_file'] or not optd["no_contact_prediction"]:
contact_util.ContactUtil.check_options(optd)
optd['use_contacts'] = True
###############################################################################
#
# MTZ file processing
#
###############################################################################
try:
mtz_util.processReflectionFile(optd)
except Exception, e:
msg = "Error processing reflection file: {0}".format(e)
exit_util.exit_error(msg, sys.exc_info()[2])
###############################################################################
#
# Modelling and ensemble options
#
###############################################################################
# Set default name for modelling directory
optd['models_dir'] = os.path.join(optd['work_dir'], "models")
# Check if importing ensembles
if optd['ensembles']:
# checks are made in ensembles.import_ensembles
optd['import_ensembles'] = True
optd['make_frags'] = False
optd['make_models'] = False
elif optd['cluster_dir']:
if not os.path.isdir(optd['cluster_dir']):
msg = "Import cluster cannot find directory: {0}".format(optd['cluster_dir'])
exit_util.exit_error(msg)
if not glob.glob(os.path.join(optd['cluster_dir'], "*.pdb")):
msg = "Import cluster cannot find pdbs in directory: {0}".format(optd['cluster_dir'])
exit_util.exit_error(msg)
logger.info("Importing pre-clustered models from directory: {0}\n".format(optd['cluster_dir']))
optd['cluster_method'] = 'import'
optd['make_frags'] = False
optd['make_models'] = False
elif optd['ideal_helices']:
optd['make_frags'] = False
optd['make_models'] = False
elif optd['homologs']:
optd['make_frags'] = False
optd['make_models'] = False
if not os.path.isfile(str(optd['alignment_file'])):
# We need to use gesamt or mustang to do the alignment
if optd['homolog_aligner'] == 'gesamt':
if not ample_util.is_exe(str(optd['gesamt_exe'])):
optd['gesamt_exe'] = os.path.join(os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT)
if not ample_util.is_exe(str(optd['gesamt_exe'])):
msg = 'Using homologs without an alignment file and cannot find gesamt_exe: {0}'.format(
optd['gesamt_exe'])
exit_util.exit_error(msg)
elif optd['homolog_aligner'] == 'mustang':
if not ample_util.is_exe(str(optd['mustang_exe'])):
msg = 'Using homologs without an alignment file and cannot find mustang_exe: {0}'.format(
optd['mustang_exe'])
exit_util.exit_error(msg)
else:
msg = 'Unknown homolog_aligner: {0}'.format(optd['homolog_aligner'])
exit_util.exit_error(msg)
if not os.path.isdir(str(optd['models'])):
msg = "Homologs option requires a directory of pdb models to be supplied\n" + \
"Please supply the models with the -models flag"
exit_util.exit_error(msg)
optd['import_models'] = True
elif optd['models']:
optd['import_models'] = True
optd['make_frags'] = False
optd['make_models'] = False
elif optd['single_model']:
optd['cluster_method'] = "skip"
optd['make_frags'] = False
optd['make_models'] = False
optd['single_model_mode'] = True
if optd['truncation_scorefile'] and optd['truncation_scorefile_header']:
optd['truncation_method'] = "scores"
# Check import flags
if optd['import_ensembles'] and (optd['import_models']):
msg = "Cannot import both models and ensembles/clusters!"
exit_util.exit_error(msg)
# NMR Checks
if optd['nmr_model_in']:
logger.info("Using nmr_model_in file: {0}".format(optd['nmr_model_in']))
if not os.path.isfile(optd['nmr_model_in']):
msg = "nmr_model_in flag given, but cannot find file: {0}".format(optd['nmr_model_in'])
exit_util.exit_error(msg)
if optd['nmr_remodel']:
optd['make_models'] = True
if optd['nmr_remodel_fasta']:
if not os.path.isfile(optd['nmr_remodel_fasta']):
msg = "Cannot find nmr_remodel_fasta file: {0}".format(optd['nmr_remodel_fasta'])
exit_util.exit_error(msg)
else:
optd['nmr_remodel_fasta'] = optd['fasta']
msg = "NMR model will be remodelled with ROSETTA using the sequence from: {0}".format(
optd['nmr_remodel_fasta'])
logger.info(msg)
if not (optd['frags_3mers'] and optd['frags_9mers']):
optd['make_frags'] = True
msg = "nmr_remodel - will be making our own fragment files"
logger.info(msg)
else:
if not (os.path.isfile(optd['frags_3mers']) and os.path.isfile(optd['frags_9mers'])):
msg = "frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n".format(
optd['frags_3mers'], optd['frags_9mers'])
exit_util.exit_error(msg)
optd['make_frags'] = False
else:
optd['make_frags'] = False
optd['make_models'] = False
msg = "Running in NMR truncate only mode"
logger.info(msg)
elif optd['make_models']:
if not os.path.isdir(optd['models_dir']):
os.mkdir(optd['models_dir'])
# If the user has given both fragment files we check they are ok and unset make_frags
if optd['frags_3mers'] and optd['frags_9mers']:
if not os.path.isfile(optd['frags_3mers']) or not os.path.isfile(optd['frags_9mers']):
msg = "frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n".format(
optd['frags_3mers'], optd['frags_9mers'])
exit_util.exit_error(msg)
optd['make_frags'] = False
if optd['make_frags'] and (optd['frags_3mers'] or optd['frags_9mers']):
msg = "make_frags set to true, but you have given the path to the frags_3mers or frags_9mers"
exit_util.exit_error(msg)
if not optd['make_frags'] and not (optd['frags_3mers'] and optd['frags_9mers']):
msg = """*** Missing fragment files! ***
Please supply the paths to the fragment files using the -frags_3mers and -frags_9mers flags.
These can be generated using the Robetta server: http://robetta.bakerlab.org
Please see the AMPLE documentation for further information."""
exit_util.exit_error(msg)
###############################################################################
#
# Misc options
#
###############################################################################
# Missing domains
if optd['missing_domain']:
logger.info('Processing missing domain\n')
if not os.path.exists(optd['domain_all_chains_pdb']):
msg = 'Cannot find file domain_all_chains_pdb: {0}'.format(optd['domain_all_chains_pdb'])
exit_util.exit_error(msg)
# Molecular Replacement Options
if optd['molrep_only']:
optd['phaser_only'] = False
#msg = 'you say you want molrep only AND phaser only, choose one or both'
#exit_util.exit_error(msg)
if optd['molrep_only']:
optd['mrbump_programs'] = ['molrep']
elif optd['phaser_only']:
optd['mrbump_programs'] = ['phaser']
else:
optd['mrbump_programs'] = ['molrep', 'phaser']
if optd['phaser_rms'] != 'auto':
try:
phaser_rms = float(optd['phaser_rms'])
optd['phaser_rms'] = phaser_rms
except ValueError as e:
msg = "Error converting phaser_rms '{0}' to floating point: {1}".format(optd['phaser_rms'], e)
exit_util.exit_error(msg)
###############################################################################
#
# Benchmark Mode
#
###############################################################################
if optd['native_pdb'] or optd['benchmark_mode']:
if optd['native_pdb'] and not os.path.isfile(optd['native_pdb']):
msg = "Cannot find crystal structure PDB: {0}".format(optd['native_pdb'])
exit_util.exit_error(msg)
optd['benchmark_mode'] = True
optd['benchmark_dir'] = os.path.join(optd['work_dir'], "benchmark")
logger.info("*** AMPLE running in benchmark mode ***")
# See if we can find TMscore
if not optd['tmscore_exe']:
optd['tmscore_exe'] = 'TMscore' + ample_util.EXE_EXT
try:
optd['tmscore_exe'] = ample_util.find_exe(optd['tmscore_exe'])
optd['have_tmscore'] = True
except ample_util.FileNotFoundError:
logger.debug("Cannot find TMScore executable: {0}".format(optd['tmscore_exe']))
# No TMscore so try and find Maxcluster
optd['maxcluster_exe'] = maxcluster.find_maxcluster(optd)
optd['have_tmscore'] = False
###############################################################################
#
# Program defaults
#
#
###############################################################################
if optd['shelxe_rebuild']:
optd['shelxe_rebuild_arpwarp'] = True
optd['shelxe_rebuild_buccaneer'] = True
# Model building programs
if optd['refine_rebuild_arpwarp'] or optd['shelxe_rebuild_arpwarp']:
if not (os.environ.has_key('warpbin')
and os.path.isfile(os.path.join(os.environ['warpbin'], "auto_tracing.sh"))):
logger.warn('Cannot find arpwarp script! Disabling use of arpwarp.')
optd['refine_rebuild_arpwarp'] = False
optd['shelxe_rebuild_arpwarp'] = False
else:
logger.info('Using arpwarp script: {0}'.format(os.path.join(os.environ['warpbin'], "auto_tracing.sh")))
#
# Check we can find all the required programs
#
# Maxcluster handled differently as we may need to download the binary
if optd['subcluster_program'] == 'maxcluster':
optd['maxcluster_exe'] = maxcluster.find_maxcluster(optd)
elif optd['subcluster_program'] == 'gesamt':
if not optd['gesamt_exe']:
optd['gesamt_exe'] = os.path.join(os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT)
try:
optd['gesamt_exe'] = ample_util.find_exe(optd['gesamt_exe'])
except ample_util.FileNotFoundError as e:
logger.info("Cannot find Gesamt executable: {0}".format(optd['gesamt_exe']))
raise (e)
#
# Ensemble options
#
if optd['cluster_method'] in ['spicker', 'spicker_qscore', 'spicker_tm']:
if not optd['spicker_exe']:
if optd['cluster_method'] == 'spicker_tm' and optd['nproc'] > 1:
# We need to use the multicore version of SPICKER
optd['spicker_exe'] = 'spicker_omp' + ample_util.EXE_EXT
else:
optd['spicker_exe'] = 'spicker' + ample_util.EXE_EXT
try:
optd['spicker_exe'] = ample_util.find_exe(optd['spicker_exe'])
except ample_util.FileNotFoundError:
msg = "Cannot find spicker executable: {0}".format(optd['spicker_exe'])
exit_util.exit_error(msg)
elif optd['cluster_method'] in ['fast_protein_cluster']:
if not optd['fast_protein_cluster_exe']:
optd['fast_protein_cluster_exe'] = 'fast_protein_cluster'
try:
optd['fast_protein_cluster_exe'] = ample_util.find_exe(optd['fast_protein_cluster_exe'])
except ample_util.FileNotFoundError:
msg = "Cannot find fast_protein_cluster executable: {0}".format(optd['fast_protein_cluster_exe'])
exit_util.exit_error(msg)
elif optd['cluster_method'] in ['import', 'random', 'skip']:
pass
else:
msg = "Unrecognised cluster_method: {0}".format(optd['cluster_method'])
exit_util.exit_error(msg)
if not optd['theseus_exe']:
optd['theseus_exe'] = 'theseus' + ample_util.EXE_EXT
try:
optd['theseus_exe'] = ample_util.find_exe(optd['theseus_exe'])
except ample_util.FileNotFoundError:
msg = "Cannot find theseus executable: {0}".format(optd['theseus_exe'])
exit_util.exit_error(msg)
if "subcluster_radius_thresholds" in optd and not optd["subcluster_radius_thresholds"]:
optd["subcluster_radius_thresholds"] = SUBCLUSTER_RADIUS_THRESHOLDS
# REM: This should really be disentangled and moved up to definition of all homologs options
# REM: but could cause confusion with defaults down here.
if "side_chain_treatments" in optd and not optd["side_chain_treatments"]:
if optd["homologs"]:
optd["side_chain_treatments"] = [POLYALA, RELIABLE, ALLATOM]
else:
optd["side_chain_treatments"] = SIDE_CHAIN_TREATMENTS
else:
optd["side_chain_treatments"] = map(str.lower, optd["side_chain_treatments"])
unrecognised_sidechains = set(optd["side_chain_treatments"]) - set(ALLOWED_SIDE_CHAIN_TREATMENTS)
if unrecognised_sidechains:
msg = "Unrecognised side_chain_treatments: {0}".format(unrecognised_sidechains)
logger.critical(msg)
exit_util.exit_error(msg)
#
# SCRWL - we always check for SCRWL as if we are processing QUARK models we want to add sidechains to them
#
#if optd['use_scwrl']:
if not optd['scwrl_exe']:
optd['scwrl_exe'] = 'Scwrl4' + ample_util.EXE_EXT
try:
optd['scwrl_exe'] = ample_util.find_exe(optd['scwrl_exe'])
except ample_util.FileNotFoundError as e:
logger.info("Cannot find Scwrl executable: {0}".format(optd['scwrl_exe']))
if optd['use_scwrl']:
raise (e)
#
# We use shelxe by default so if we can't find it we just warn and set use_shelxe to False
#
if optd['use_shelxe']:
if not optd['shelxe_exe']:
optd['shelxe_exe'] = 'shelxe' + ample_util.EXE_EXT
try:
optd['shelxe_exe'] = ample_util.find_exe(optd['shelxe_exe'])
except ample_util.FileNotFoundError:
msg = """*** Cannot find shelxe executable in PATH - turning off use of SHELXE. ***
SHELXE is recommended for the best chance of success. We recommend you install shelxe from:
http://shelx.uni-ac.gwdg.de/SHELX/
and install it in your PATH so that AMPLE can use it.
"""
logger.warn(msg)
optd['use_shelxe'] = False
#
# If shelxe_rebuild is set we need use_shelxe to be set
#
if optd['shelxe_rebuild'] and not optd['use_shelxe']:
msg = 'shelxe_rebuild is set but use_shelxe is False. Please make sure you have shelxe installed.'
exit_util.exit_error(msg)
# Output various information to the user
logger.info('Running on %d processors' % optd['nproc'])
if optd['make_frags']:
if optd['use_homs']:
logger.info('Making fragments (including homologues)')
else:
logger.info('Making fragments EXCLUDING HOMOLOGUES')
else:
logger.info('NOT making Fragments')
if optd['make_models']:
logger.info('\nMaking Rosetta Models')
else:
logger.info('NOT making Rosetta Models')
# Print out what is being done
if optd['refine_rebuild_arpwarp'] or optd['shelxe_rebuild_arpwarp']:
logger.info('Rebuilding in Bucaneer')
else:
logger.info('Not rebuilding in Bucaneer')
if optd['refine_rebuild_buccaneer'] or optd['shelxe_rebuild_buccaneer']:
logger.info('Rebuilding in ARP/wARP')
else:
logger.info('Not rebuilding in ARP/wARP')
# cluster queueing
if optd['submit_qtype']:
optd['submit_qtype'] = optd['submit_qtype'].upper()
if optd['submit_cluster'] and not optd['submit_qtype']:
msg = 'Must use -submit_qtype argument to specify queueing system (e.g. QSUB, LSF ) if submitting to a cluster.'
exit_util.exit_error(msg)
if optd['purge']:
logger.info('*** Purge mode specified - all intermediate files will be deleted ***')
return
[docs]def process_restart_options(optd):
"""Process the restart options
Description
-----------
For any new command-line options, we update the old dictionary with the new values
We then go through the new dictionary and set ant of the flags corresponding to the data we find:
if restart.pkl
- if completed mrbump jobs
make_frags, make_models, make_ensembles = False
make_mr = True
- if all jobs aren't completed, rerun the remaining mrbump jobs - IN THE OLD DIRECTORY?
- if all jobs are completed and we are in benchmark mode run the benchmarking
make_frags, make_models, make_ensembles, make_mr = False
make_benchmark = True
- END
- if ensemble files
- if no ensemble data, create ensemble data
make_frags, make_models, make_ensembles = False
make_mr = True
- create and run the mrbump jobs - see above
# BElow all same as default
- if models and no ensembles
- create ensembles from the models
FLAGS
make_frags
make_models
make_ensembles
make_mr
make_benchmark
Notes
-----
We return the dictionary as we may need to change it and it seems we can't change the external
reference in this scope. I think?...
"""
if not optd['restart_pkl']:
return optd
logger.info('Restarting from existing pkl file: {0}'.format(optd['restart_pkl']))
# Go through and see what we need to do
# Reset all variables for doing stuff - otherwise we will always restart from the earliest point
optd['make_ensembles'] = False
#optd['import_ensembles'] = False # Needs thinking about - have to set so we don't just reimport models/ensembles
optd['import_models'] = False # Needs thinking about
optd['make_models'] = False
optd['make_frags'] = False
# First see if we should benchmark this job. The user may not have supplied a native_pdb with the original
# job and we only set benchmark mode on seeing the native_pdb
if optd['native_pdb']:
if not os.path.isfile(optd['native_pdb']):
msg = "Cannot find native_pdb: {0}".format(optd['native_pdb'])
logger.critical(msg)
raise RuntimeError(msg)
optd['benchmark_mode'] = True
logger.info('Restart using benchmark mode')
# We always check first to see if there are any mrbump jobs
optd['mrbump_scripts'] = []
if 'mrbump_dir' in optd:
optd['mrbump_scripts'] = mrbump_util.unfinished_scripts(optd)
if not optd['mrbump_scripts']:
optd['do_mr'] = False
if optd['do_mr']:
if len(optd['mrbump_scripts']):
logger.info('Restarting from unfinished mrbump scripts: {0}'.format(optd['mrbump_scripts']))
# Purge unfinished jobs
for spath in optd['mrbump_scripts']:
directory, script = os.path.split(spath)
name, _ = os.path.splitext(script)
# Hack to delete old job directories
logfile = os.path.join(directory, name + '.log')
if os.path.isfile(logfile):
os.unlink(logfile)
jobdir = os.path.join(directory, 'search_' + name + '_mrbump')
if os.path.isdir(jobdir):
shutil.rmtree(jobdir)
elif 'ensembles' in optd and optd['ensembles'] and len(optd['ensembles']):
# Rerun from ensembles - check for data/ensembles are ok?
logger.info('Restarting from existing ensembles: {0}'.format(optd['ensembles']))
elif 'models_dir' in optd and optd['models_dir'] and os.path.isdir(optd['models_dir']):
logger.info('Restarting from existing models: {0}'.format(optd['models_dir']))
# Check the models
allsame = False if optd['homologs'] else True
if not pdb_edit.check_pdb_directory(optd['models_dir'], sequence=None, single=True, allsame=allsame):
msg = "Error importing restart models: {0}".format(optd['models_dir'])
exit_util.exit_error(msg)
optd['make_ensembles'] = True
elif optd['frags_3mers'] and optd['frags_9mers']:
logger.info('Restarting from existing fragments: {0}, {1}'.format(optd['frags_3mers'], optd['frags_9mers']))
optd['make_models'] = True
return optd
[docs]def process_rosetta_options(optd):
# Create the rosetta modeller - this runs all the checks required
rosetta_modeller = None
if optd['make_models'] or optd['make_frags']: # only need Rosetta if making models
logger.info('Using ROSETTA so checking options')
try:
rosetta_modeller = rosetta_model.RosettaModel(optd=optd)
except Exception, e:
msg = "Error setting ROSETTA options: {0}".format(e)
exit_util.exit_error(msg)
return rosetta_modeller
[docs]def restart_amoptd(optd):
"""Create an ample dictionary from a restart pkl file
Description
-----------
For any new command-line options, we update the old dictionary with the new values
We then go through the new dictionary and set ant of the flags corresponding to the data we find:
Notes
-----
We return the dictionary as we may need to change it and it seems we can't change the external
reference in this scope. I think?...
"""
if not optd['restart_pkl']:
return optd
logger.info('Restarting from existing pkl file: {0}'.format(optd['restart_pkl']))
# We use the old dictionary, but udpate it with any new values
optd_old = ample_util.read_amoptd(optd['restart_pkl'])
# Now update any variables that were given on the command-line
for k in optd['cmdline_flags']:
logger.debug("Restart updating amopt variable: {0} : {1}".format(k, optd[k]))
optd_old[k] = optd[k]
# We can now replace the old dictionary with this new one
optd = optd_old
return optd