Source code for ample.util.argparse_util

__author__ = "Jens Thomas, Felix Simkovic & Adam Simpkin"
__date__ = "10 June 2019"
__version__ = "1.0"

import argparse
import os
from ample.modelling.multimer_definitions import MULTIMER_MODES
from pyjob.factory import TASK_PLATFORMS


[docs]class BoolAction(argparse.Action): """Class to set a boolean value either form a string or just from the use of the command-line flag""" def __call__(self, parser, namespace, values, option_string=None): if values is None: # values = self.default values = True # if nothing specified supplying the flag sets the variable to True if values in ['0', 'f', 'F', 'false', 'False', False]: values = False elif values in ['1', 't', 'T', 'true', 'True', True]: values = True else: raise argparse.ArgumentError(self, 'Unrecognised True/False value: {0}'.format(values)) setattr(namespace, self.dest, values)
[docs]class FilePathAction(argparse.Action): """Class to handle paths to files or directories. AMPLE changes directory into a work directory so relative paths to files don't work. We set absolulte paths here. """ def __call__(self, parser, namespace, values, option_string=None): if isinstance(values, str): values = os.path.abspath(values) setattr(namespace, self.dest, values)
[docs]def add_core_options(parser=None): """Function to add any arguments required by all runtypes""" if parser is None: parser = argparse.ArgumentParser() parser.add_argument('-config_file', action=FilePathAction, help="user configuration file") parser.add_argument('-debug', action=BoolAction, nargs='?', metavar='True/False', help=argparse.SUPPRESS) parser.add_argument( '-nproc', type=int, default=1, help="Number of processors [1]. For local, serial runs the jobs will be split across nproc processors. For cluster submission, this should be the number of processors on a node.", ) parser.add_argument( '-work_dir', action=FilePathAction, help='Path to the directory where the job will run (will be created if it doesn\'t exist)', ) return parser
[docs]def add_cluster_submit_options(parser=None): """Add the options for submission to a cluster queuing system""" if parser is None: parser = argparse.ArgumentParser() submit_group = parser.add_argument_group('Cluster queue submission options') submit_group.add_argument( '-submit_array', action=BoolAction, nargs='?', metavar='True/False', help='Submit cluster jobs as an array' ) #################################################################################################################### # TO BE DEPRECATED #################################################################################################################### submit_group.add_argument( '-submit_cluster', action=BoolAction, nargs='?', metavar='True/False', help='Submit jobs to a cluster - need to set -submit_qtype flag to specify the batch queue system.', ) submit_group.add_argument( '-submit_pe_lsf', help='Cluster submission: string to set number of processors for LSF queueing system' ) submit_group.add_argument( '-submit_pe_sge', help='Cluster submission: string to set number of processors for SGE queueing system' ) #################################################################################################################### submit_group.add_argument( '-submit_max_array', type=int, help='The maximum number of jobs to run concurrently with SGE array job submission', ) submit_group.add_argument( '-submit_num_array_jobs', type=int, help='The number of jobs to run concurrently with SGE array job submission' ) submit_group.add_argument('-submit_pe', help='Cluster submission: string to set parallel environment') submit_group.add_argument('-submit_queue', help='The queue to submit to on the cluster.') submit_group.add_argument('-submit_qtype', choices=TASK_PLATFORMS.keys(), help='Cluster submission queue type', default='local') return parser
[docs]def add_general_options(parser=None): from ample.util import version if parser is None: parser = argparse.ArgumentParser() add_core_options(parser) parser.add_argument( '-alignment_file', action=FilePathAction, help='Alignment file in fasta format. For homologues the first line of each sequence must be the pdb file name', ) parser.add_argument( '-allow_his_tag', action=BoolAction, nargs='?', metavar='True/False', help='Allow HIS tags in the input sequence', ) parser.add_argument( '-blast_dir', action=FilePathAction, help='Directory where ncbi blast is installed (binaries in expected in bin subdirectory)', ) parser.add_argument( '-classic_mode', metavar='True/False', help='Preset options to run the original AMPLE clustering/truncation options (1 cluster, 3 subclustering radii, 3 sidechains)', ) parser.add_argument( '-ccp4i2_xml', action=FilePathAction, help='Path to CCP4I2 XML file - if not None indicates we are running under CCP4I2', ) parser.add_argument( '-coiled_coil', action=BoolAction, nargs='?', metavar='True/False', help='Turn on Coiled-Coil mode for solving Coiled-Coil structures', ) parser.add_argument( '-devel_mode', metavar='devel_mode', help='Preset options to run in development mode - takes longer' ) parser.add_argument('-dry_run', metavar='True/False', help='Check if input files and supplied options are valid.') parser.add_argument( '-early_terminate', action=BoolAction, nargs='?', metavar='True/False', help='Stop the run as soon as a success has been found.', ) parser.add_argument('-ensembles', help='Path to directory containing existing ensembles') parser.add_argument('-fasta', action=FilePathAction, help='protein fasta file. (required)') parser.add_argument('-fast_protein_cluster_exe', help='path to fast_protein_cluster executable') parser.add_argument('-F', metavar='flag for F', help='Flag for F column in the MTZ file') parser.add_argument('-FREE', metavar='flag for FREE', help='Flag for FREE column in the MTZ file') parser.add_argument( '-ideal_helices', action=BoolAction, nargs='?', metavar='True/False', help='Attempt to solve the structure using ideal polyalanine helices (8 helices: from 5-40 residues)', ) parser.add_argument( '-helical_ensembles', action=BoolAction, nargs='?', metavar='True/False', help='Attempt to solve the structure using helical ensembles (minimal set of 12 ensembles used by default)', ) parser.add_argument('-helical_ensembles_set', choices=['full', 'minimal'], nargs='?', default='minimal', help='Choose the set of helical ensembles to be used: full - 64 ensembles, minimal - 12 ensembles') parser.add_argument( '-improve_template', metavar='improve_template', help='Path to a template to improve - NMR, homolog' ) parser.add_argument('-LGA', metavar='path_to_LGA dir', help=argparse.SUPPRESS) parser.add_argument( '-make_models', action=BoolAction, nargs='?', metavar='True/False', help='run rosetta modeling, set to False to import pre-made models (required if making models locally default True)', ) parser.add_argument('-max_array_jobs', help='Maximum number of array jobs to run') parser.add_argument( '-models', metavar='models', type=os.path.abspath, help='Path to a folder of PDB decoys, or a tarred and gzipped/bziped, or zipped collection of decoys', ) parser.add_argument( '-mr_sequence', action=FilePathAction, help="sequence file for crystal content (if different from what's given by -fasta)", ) parser.add_argument('-mtz', action=FilePathAction, metavar='MTZ in', help='The MTZ file with the reflection data.') parser.add_argument('-name', metavar='job_name', help='4-letter identifier for job [ampl]') parser.add_argument( '-native_pdb', action=FilePathAction, metavar='native_pdb', help='Path to the crystal structure PDB for benchmarking.', ) parser.add_argument( '-native_mtz', action=FilePathAction, metavar='native_pdb', help='Path to the native MTZ containing FC and PHIC calculated phases for benchmarking.', ) parser.add_argument('-nmr_model_in', action=FilePathAction, metavar='nmr_model_in', help='PDB with NMR models') parser.add_argument('-nmr_process', type=int, help='number of times to process the NMR models') parser.add_argument( '-nmr_remodel', action=BoolAction, nargs='?', metavar='True/False', help='Remodel the NMR structures' ) parser.add_argument( '-nmr_remodel_fasta', action=FilePathAction, help='The FASTA sequence to be used for remodelling the NMR ensemble if different from the default FASTA sequence', ) parser.add_argument( '-purge', metavar='purge_level', type=int, choices=[0, 1, 2], help='Delete intermediate files and failed MRBUMP results: 0 - None, 1 - Some, 2 - All possible', ) parser.add_argument('-psipred_ss2', metavar='PSIPRED_FILE', help='Psipred secondary structure prediction file') parser.add_argument( '-quick_mode', action=BoolAction, nargs='?', metavar='True/False', help='Preset options to run quickly, but less thoroughly', ) parser.add_argument('-restart_pkl', help='Rerun a job using the pickled ample dictionary') parser.add_argument( '-run_dir', action=FilePathAction, metavar='run_directory', help='Directory where the AMPLE work directory will be created [current dir]', default=os.getcwd(), ) parser.add_argument( '-rvapi_document', action=FilePathAction, help='Path to an existing rvapi document (for running under jscofe)' ) parser.add_argument('-scwrl_exe', metavar='path to scwrl', help='Path to Scwrl4 executable') parser.add_argument( '-show_gui', action=BoolAction, nargs='?', metavar='True/False', help='Pop up and display a stand-alone GUI' ) parser.add_argument('-single_model', action=FilePathAction, help='Single structure model to be used to create ensembles') parser.add_argument( '-sf_cif', action=FilePathAction, help='Path to a structure factor CIF file (instead of MTZ file)' ) parser.add_argument('-SIGF', help='Flag for SIGF column in the MTZ file') parser.add_argument('-top_model_only', metavar='True/False', help='Only process the top model in each ensemble') parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(version.__version__)) parser.add_argument( '-webserver_uri', help='URI of the webserver directory - also indicates we are running as a webserver' ) return parser
[docs]def add_contact_options(parser=None): """Contact prediction related options""" if parser is None: parser = argparse.ArgumentParser() contact_group = parser.add_argument_group("Contact Restraints Options") contact_group.add_argument( '-bbcontacts_file', action=FilePathAction, help='Additional bbcontacts file. Requires normal contactfile' ) contact_group.add_argument( '-bbcontacts_format', help='Residue contact file format. For available formats refer to the AMPLE documentation' ) contact_group.add_argument('-contact_file', action=FilePathAction, help='Residue contact file') contact_group.add_argument( '-contact_format', help='Residue contact file format. For available formats refer to the AMPLE documentation' ) contact_group.add_argument( '-disulfide_constraints_file', action=FilePathAction, help='Disulfide residue constraints for ab initio modelling', ) contact_group.add_argument( '-distance_to_neighbour', type=int, help="Min. distance between residue pairs for contact (default=5)" ) contact_group.add_argument( '-energy_function', help='Rosetta energy function for contact restraint conversion (default=FADE)' ) contact_group.add_argument( '-native_cutoff', type=float, help='Distance cutoff for reference contacts in native structure (default=8A)' ) contact_group.add_argument( '--no-contact-prediction', action=BoolAction, default=False, help="Do not predict contacts" ) contact_group.add_argument( '-restraints_factor', type=float, help='Factor (* Sequence length) determining number of contact restraints to use (default=1.0)', ) contact_group.add_argument( '-restraints_file', action=FilePathAction, help='Residue restraints for ab initio modelling' ) contact_group.add_argument( '-restraints_weight', type=float, help="Additional energy weighting of restraints in Rosetta" ) contact_group.add_argument( '-subselect_mode', help="Long-range decoy satisfaction subselection mode - one of [{0}]".format( " | ".join(["linear", "scaled", "cutoff"]) ), ) return parser
[docs]def add_mr_options(parser=None): if parser is None: parser = argparse.ArgumentParser() mr_group = parser.add_argument_group('MRBUMP/Molecular Replacement Options') mr_group.add_argument('-arpwarp_cycles', type=int, help='The number of ArpWarp cycles to run') mr_group.add_argument('-buccaneer_cycles', type=int, help='The number of Bucanner rebuilding cycles to run') mr_group.add_argument( '-do_mr', action=BoolAction, nargs='?', metavar='True/False', help='Run or skip the Molecular Replacement step' ) mr_group.add_argument('-domain_termini_distance', help='distance between termini for insert domains') mr_group.add_argument('-existing_mr_solution', action=FilePathAction, help='Existing MR solution to give to MRBUMP') mr_group.add_argument( '-early_terminate_SHELXE_CC', type=float, help='SHELXE_CC criteria for when a job has succeeeded' ) mr_group.add_argument( '-early_terminate_SHELXE_ACL', type=int, help='SHELXE_ACL criteria for when a job has succeeeded' ) mr_group.add_argument( '-molrep_only', action=BoolAction, nargs='?', metavar='True/False', help='Only use Molrep for Molecular Replacement step in MRBUMP', ) mr_group.add_argument( '-mrbump_dir', action=FilePathAction, help='Path to a directory of MRBUMP jobs (see restart_pkl)' ) mr_group.add_argument( '-mr_keys', nargs='+', action='append', help='Additional keywords for MRBUMP - are passed through without editing', ) mr_group.add_argument( '-mr_sg_all', metavar='True/False', help='Try all possible space groups in PHASER Molecular Replacement step in MRBUMP', ) mr_group.add_argument( '-nmasu', type=int, help='Manually specify the number of molecules in the asymmetric unit - sets the NMASu MRBUMP flag', ) mr_group.add_argument( '-phaser_kill', metavar='phaser_kill', type=int, help='Time in minutes after which phaser will be killed (0 to leave running)', ) mr_group.add_argument( '-phaser_only', action=BoolAction, nargs='?', metavar='True/False', help='Only use Phaser for Molecular Replacement step in MRBUMP', ) mr_group.add_argument('-phaser_rms', metavar='phaser_rms', help='RMS value for phaser') mr_group.add_argument( '-refine_rebuild_arpwarp', metavar='True/False', help='True to use ARPWARP to rebuild the REFMAC-refined MR result.', ) mr_group.add_argument( '-refine_rebuild_buccaneer', metavar='True/False', help='True to use Buccaneer to rebuild the REFMAC-refined MR result.', ) mr_group.add_argument('-shelx_cycles', help='The number of SHELXE cycles to run when rebuilding.') mr_group.add_argument('-shelxe_exe', metavar='path to shelxe executable', help='Path to the SHELXE executable') mr_group.add_argument('-shelxe_max_resolution', help='Maximum permitted resolution for rebuilding with SHELXE') mr_group.add_argument( '-shelxe_rebuild', action=BoolAction, nargs='?', metavar='True/False', help='Rebuild SHELXE traced pdb with buccaneer and arpwarp', ) mr_group.add_argument( '-shelxe_rebuild_arpwarp', action=BoolAction, nargs='?', metavar='True/False', help='Rebuild SHELXE traced pdb with arpwarp', ) mr_group.add_argument( '-shelxe_rebuild_buccaneer', action=BoolAction, nargs='?', metavar='True/False', help='Rebuild SHELXE traced pdb with buccaneer', ) mr_group.add_argument( '-use_scwrl', action=BoolAction, nargs='?', metavar='True/False', help='Remodel sidechains of the decoy models using Scwrl4', ) mr_group.add_argument('-use_shelxe', action=BoolAction, nargs='?', metavar='True/False', help='True to use SHELXE') return parser
[docs]def add_rosetta_options(parser=None): if parser is None: parser = argparse.ArgumentParser() rosetta_group = parser.add_argument_group('ROSETTA Modelling Options') rosetta_group.add_argument( '-all_atom', action=BoolAction, nargs='?', metavar='True/False', help="Do all-atom Rosetta modelling (adds \"-return_full_atom true\" to rosetta arguments", ) rosetta_group.add_argument( '-frags_3mers', action=FilePathAction, help='Path to file with pre-existing Rosetta 3mer fragments' ) rosetta_group.add_argument( '-frags_9mers', action=FilePathAction, help='Path to file with pre-existing Rosetta 3mer fragments' ) rosetta_group.add_argument( '-make_frags', action=BoolAction, nargs='?', metavar='True/False', help='set True to generate Rosetta 3mers and 9mers locally, False to import fragments', ) rosetta_group.add_argument( '-multimer_modelling', help='Generate multimeric models. Accepted values: {}'.format(MULTIMER_MODES) ) rosetta_group.add_argument( '-nmodels', metavar='number of models', type=int, help='number of models to make (default: 1000)' ) rosetta_group.add_argument('-nr', metavar='nr', help='Path to the NR non-redundant sequence database') rosetta_group.add_argument( '-rg_reweight', metavar='radius of gyration reweight', type=float, help='Set the Rosetta -rg_reweight flag to specify the radius of gyration reweight.', ) rosetta_group.add_argument( '-rosetta_executable', action=FilePathAction, help='Path to ROSETTA executable for modelling' ) rosetta_group.add_argument('-rosetta_db', action=FilePathAction, help='Path to the Rosetta database directory') rosetta_group.add_argument('-rosetta_dir', action=FilePathAction, help='The Rosetta install directory') rosetta_group.add_argument( '-rosetta_fragments_exe', action=FilePathAction, help='Location of the Rosetta make_fragments.pl script' ) rosetta_group.add_argument( '-rosetta_flagsfile', action=FilePathAction, help='Location of file with Rosetta modelling commands' ) rosetta_group.add_argument('-rosetta_version', type=float, help='The version number of Rosetta') rosetta_group.add_argument( '-transmembrane', action=BoolAction, nargs='?', metavar='True/False', help='Do Rosetta modelling for transmembrane proteins (Ovchinnikov protocol)', ) rosetta_group.add_argument( '-transmembrane_old', action=BoolAction, nargs='?', metavar='True/False', help='Do Rosetta modelling for transmembrane proteins (Yarov-Yarovoy protocol)', ) rosetta_group.add_argument( '-transmembrane_octopusfile', action=FilePathAction, help='Octopus transmembrane topology predicition file' ) rosetta_group.add_argument( '-transmembrane_spanfile', action=FilePathAction, help='Span file for modelling transmembrane proteins' ) rosetta_group.add_argument( '-transmembrane_lipofile', action=FilePathAction, help='Lips4 file for modelling transmembrane proteins' ) rosetta_group.add_argument( '-use_homs', action=BoolAction, nargs='?', metavar='True/False', help="Select ROSETTA fragments from homologous models", ) return parser
[docs]def add_ensembler_options(parser=None): # --------------------------------------------------------------------------------------------- # # sphinx-argparse ignores Mock imports and thus cannot find iotbx.pdb when generating the docs. # try: from ample.ensembler.constants import ALLOWED_SIDE_CHAIN_TREATMENTS, SPICKER_RMSD, SPICKER_TM from ample.ensembler.truncation_util import TRUNCATION_METHODS except ImportError: allowed_side_chain_treatments = ['polyala', 'reliable', 'allatom', 'unmod'] truncation_methods = ['percent'] SPICKER_RMSD = 'spicker' SPICKER_TM = 'spicker_tm' else: allowed_side_chain_treatments = ALLOWED_SIDE_CHAIN_TREATMENTS[:] truncation_methods = [t.value for t in TRUNCATION_METHODS] if parser is None: parser = argparse.ArgumentParser() ensembler_group = parser.add_argument_group('Ensemble Options') ensembler_group.add_argument( '-cluster_dir', action=FilePathAction, help='Path to directory of pre-clustered models to import' ) ensembler_group.add_argument( '-cluster_method', help='How to cluster the models for ensembling. Options: ' + '|'.join([SPICKER_RMSD, SPICKER_TM]), ) ensembler_group.add_argument('-ensembler_timeout', type=int, help='Time in seconds before timing out ensembling') ensembler_group.add_argument( '-gesamt_exe', action=FilePathAction, metavar='gesamt_exe', help='Path to the gesamt executable' ) ensembler_group.add_argument( '-homologs', action=BoolAction, nargs='?', metavar='True/False', help='Generate ensembles from homologs models (requires -alignment_file)', ) ensembler_group.add_argument( '-homolog_aligner', metavar='homolog_aligner', help='Program to use for structural alignment of homologs (gesamt|mustang)', ) ensembler_group.add_argument('-ensemble_max_models', help='Maximum number of models permitted in an ensemble') ensembler_group.add_argument( '-mustang_exe', action=FilePathAction, metavar='mustang_exe', help='Path to the mustang executable' ) ensembler_group.add_argument( '-num_clusters', type=int, help='The number of Spicker clusters of the original decoys that will be sampled [1]' ) ensembler_group.add_argument('-percent', metavar='percent_truncation', help='percent interval for truncation') ensembler_group.add_argument( '-percent_fixed_intervals', nargs='+', type=int, help='list of integer percentage intervals for truncation' ) ensembler_group.add_argument('-score_matrix', action=FilePathAction, help='Path to score matrix for spicker') ensembler_group.add_argument( '-score_matrix_file_list', action=FilePathAction, help='File with list of ordered model names for the score_matrix', ) ensembler_group.add_argument( '-side_chain_treatments', type=str, nargs='+', help='The side chain treatments to use. Options: ' + '|'.join(allowed_side_chain_treatments), ) ensembler_group.add_argument('-spicker_exe', action=FilePathAction, help='Path to spicker executable') ensembler_group.add_argument( '-subcluster_radius_thresholds', type=float, nargs='+', help='The radii to use for subclustering the truncated ensembles', ) ensembler_group.add_argument('-subcluster_program', help='Program for subclustering models [gesamt]') ensembler_group.add_argument( '-theseus_exe', action=FilePathAction, metavar='Theseus exe', help='Path to theseus executable' ) ensembler_group.add_argument( '-thin_clusters', action=BoolAction, nargs='?', metavar='True/False', help='Create ensembles from 10 clusters with 1 + 3A subclustering and polyAlanine sidechains', ) ensembler_group.add_argument( '-truncation_method', help='How to truncate the models for ensembling: ' + '|'.join(truncation_methods) ) ensembler_group.add_argument('-truncation_pruning', help='Whether to remove isolated residues (single)') ensembler_group.add_argument( '-truncation_scorefile', action=FilePathAction, help="CSV file containing per residue scores - COLUMN ONE MUST BE RESIDUE INDEX STARTING FROM 1", ) ensembler_group.add_argument( '-truncation_scorefile_header', nargs='+', help="column headers to be used to create ensembles" ) return parser
[docs]def process_command_line(args=None, contacts=True, modelling=True, mol_rep=True): """Process the command-line for the main AMPLE program. :args: optional argument that can hold the command-line arguments if we have been called from within python for testing """ parser = argparse.ArgumentParser( description="AMPLE: Ab initio Modelling of Proteins for moLEcular replacement", prefix_chars="-" ) add_general_options(parser) add_cluster_submit_options(parser) add_ensembler_options(parser) if contacts: add_contact_options(parser) if mol_rep: add_mr_options(parser) if modelling: add_rosetta_options(parser) return vars(parser.parse_args(args))