Source code for ample.util.config_util

'''
30.01.2016

@author: hlfsimko
'''
import logging
import multiprocessing
import os
import traceback

from ample.constants import AMPLE_CONFIG_FILE
from ample.ensembler.constants import POLYALA, RELIABLE, ALLATOM
from ample.util import version

try:
    from configparser import ConfigParser as SafeConfigParser
except ImportError:
    from ConfigParser import SafeConfigParser

logger = logging.getLogger(__name__)

##############################################################
# The sections and options within need to be stored
# otherwise we cannot manage interplay between
# ConfigParser and AMPLE settings dictionary.
# Some default non-dynamic parts are stored below to avoid errors

_SECTIONS_REFERENCE = {"AMPLE_info": ["ample_version",
                                      "ccp4_version",
                                      "cmdline_flags"],

                       "Databases": ['nr',
                                     'rosetta_db'],

                       "Executables": ['blast_dir',
                                       'cluster_exe',
                                       'fast_protein_cluster_exe',
                                       'gesamt_exe',
                                       'maxcluster_exe',
                                       'mustang_exe',
                                       'rosetta_dir',
                                       'rosetta_fragments_exe',
                                       'rosetta_AbinitioRelax',
                                       'scwrl_exe',
                                       'shelxe_exe',
                                       'spicker_exe',
                                       'theseus_exe'],

                       "Files": ['alignment_file',
                                 'ample_log',
                                 'bbcontacts_file',
                                 'cluster_dir',
                                 'config_file',
                                 'contact_file',
                                 'disulfide_constraints_file',
                                 'domain_all_chains_pdb',
                                 'ensembles',
                                 'ensembles_directory',
                                 'ensemble_ok',
                                 'fasta',
                                 'frags_3mers',
                                 'frags_9mers',
                                 'models',
                                 'models_dir',
                                 'mrbump_dir',
                                 'mr_sequence',
                                 'mtz',
                                 'native_pdb',
                                 'native_mtz',
                                 'nmr_model_in',
                                 'nmr_remodel_fasta',
                                 'out_config_file',
                                 'psipred_ss2',
                                 'restart_pkl',
                                 'restraints_file',
                                 'results_path',
                                 'score_matrix',
                                 'score_matrix_file_list',
                                 'sf_cif',
                                 'single_model',
                                 'transmembrane_octopusfile',
                                 'transmembrane_lipofile',
                                 'transmembrane_spanfile',
                                 'truncation_scorefile',
                                 'work_dir'],
                       # Data stored in amopt.d but not really part of AMPLE's configuration
                       "No_config": ["benchmark_results",
                                     "ensembles_data",
                                     "fasta_length",
                                     "mrbump_results",
                                     "sequence",
                                     "truncation_variances",
                                     "truncation_levels",
                                     "truncation_nresidues"],
                       # In case we haven't specified anything or it is new
                       "Unspecified": [],
                       }

[docs]class DebugDict(dict): """A Dictionary class that prints when watched items are set or accessed""" def __init__(self, *args, **kwargs): dict.__init__(self, args) self.watchkeys = [] if 'watchkeys' in kwargs: watchkeys = kwargs['watchkeys'] if not isinstance(watchkeys, list): list(watchkeys) self.watchkeys = watchkeys def __getitem__(self, key): val = dict.__getitem__(self, key) if key in self.watchkeys: logger.info("AMOPT GET {0}['{1}'] = {2}".format(dict.get(self, 'name_label'), key, val)) logger.info("AMOPT STACK:\n{0}".format(os.linesep.join(traceback.format_list(traceback.extract_stack())[:-1]))) return val def __setitem__(self, key, val): if key in self.watchkeys: logger.info("AMOPT SET {0}['{1}'] = {2}".format(dict.get(self, 'name_label'), key, val)) logger.info("AMOPT STACK:\n{0}".format(os.linesep.join(traceback.format_list(traceback.extract_stack())[:-1]))) dict.__setitem__(self, key, val)
[docs]class AMPLEConfigOptions(object): def __init__(self): self.d = {} # Can't use defaultdict as need lambda function to return None, which won't pickle #self.d = DebugDict(watchkeys=['models']) self.cmdline_opts = {} self.debug = False # The original AMPLE clustering/truncation mode used in all work prior to January 2017 self.classic_mode = { 'percent': 5, 'num_clusters': 1, 'subcluster_radius_thresholds': [1, 2, 3], 'side_chain_treatments': [POLYALA, RELIABLE, ALLATOM], } # Test use scrwl self.devel_mode = { 'benchmark_mode': True, 'early_terminate': False, 'shelxe_rebuild': True, 'shelxe_rebuild_arpwarp': True, 'shelxe_rebuild_buccaneer': True, 'refine_rebuild_arpwarp': False, 'refine_rebuild_buccaneer': False, #'mr_keys' : [ [ 'PKEY', 'KILL','TIME','360' ] ], } self.quick_mode = { 'ensemble_max_models': 10, 'nmodels': 200, 'percent': 20, 'shelx_cycles': 5, 'refine_rebuild_arpwarp': False, 'refine_rebuild_buccaneer': False, 'phaser_kill': 15 } self.webserver_uri = { 'shelxe_rebuild_arpwarp': False, # Need to sort out the ArpWarp licence details 'shelxe_rebuild_buccaneer': True, 'cluster_method': 'spicker_tm', 'nproc': 1, 'purge': True, 'submit_cluster': True, 'submit_max_array': 10, 'submit_qtype': "SGE", 'submit_queue': "all.q", }
[docs] def populate(self, cmdline_opts): # Convert Namespace to Dictionary self.cmdline_opts = cmdline_opts = vars(cmdline_opts) # Identify which config file to use config_file = self._get_config_file(cmdline_opts['config_file']) # Read the configuration file self._read_config_file(config_file) # Read the command line arguments self._read_cmdline_opts(cmdline_opts) # Set further options self._process_options() return
def _get_config_file(self, cmd_file=None): config_file = os.path.abspath( cmd_file) if cmd_file else AMPLE_CONFIG_FILE if not os.path.isfile(config_file): msg = "Cannot find configuration file: {0} - terminating...".format( config_file) logger.critical(msg) raise RuntimeError(msg) logger.debug("Using configuration file: {0}".format(config_file)) return config_file def _process_options(self): """ Handle any top-level options that affect the overall running of AMPLE. Notes ----- Any specific processing of options should be handled in ample/util/options_processor.py/process_options See Also -------- options_processor """ self.d['ample_version'] = version.__version__ if "rcdir" in self.d and not self.d["rcdir"]: self.d["rcdir"] = os.path.join(os.path.expanduser("~"), ".ample") if "run_dir" in self.d and not self.d["run_dir"]: self.d["run_dir"] = os.getcwd() # Set full file paths for k, v in self.d.iteritems(): if k in _SECTIONS_REFERENCE["Files"] and v: self.d[k] = os.path.abspath(v) # Use the maximum number of processors unless overridden by the user if self.d['nproc'] is None: if self.d['submit_cluster']: self.d['nproc'] = 1 else: self.d['nproc'] = multiprocessing.cpu_count() # Check if using any preset options if self.d['classic_mode']: self._preset_options('classic_mode') if self.d['devel_mode']: self._preset_options('devel_mode') if self.d['quick_mode']: self._preset_options('quick_mode') if self.d['thin_clusters']: self._preset_options('thin_clusters') if self.d['webserver_uri']: self._preset_options('webserver_uri') return def _preset_options(self, mode): assert hasattr(self, mode), "Unknown mode: {0}".format(mode) logger.info("Using preset mode: {0}".format(mode)) for k, v in getattr(self, mode).iteritems(): if 'cmdline_flags' in self.d and k in self.d['cmdline_flags']: if self.d[k] == v: msg = 'WARNING! {0} flag {1} => {2} was duplicated on the command line!'.format( mode, v, k) else: msg = "WARNING! Overriding {0} setting: {1} => {2} with {3}".format( mode, k, v, self.d[k]) logger.critical(msg) elif k in self.d: logger.debug("{0} overriding default setting: {1} => {2} with {3}".format( mode, k, v, self.d[k])) self.d[k] = v else: logger.debug("{0} setting: {1} => {2}".format(mode, k, v)) self.d[k] = v return def _read_config_file(self, config_file): config = SafeConfigParser() # We need to make sure that the keys aren't converted to lower case on reading config.optionxform = str config.read(config_file) for section in config.sections(): if not section in _SECTIONS_REFERENCE: _SECTIONS_REFERENCE[section] = [] # Basic switch statement to determine the type of the variable for k, v in config.items(section): if v.lower() == "none": self.d[k] = None elif v.lower() == "true": self.d[k] = True elif v.lower() == "false": self.d[k] = False elif section.lower() == "databases": self.d[k] = os.path.abspath(v) elif section.lower() == "executables": self.d[k] = os.path.abspath(v) elif section.lower() == "files": self.d[k] = os.path.abspath(v) elif v.isdigit(): self.d[k] = int(v) elif self._isfloat(v): self.d[k] = float(v) else: self.d[k] = v _SECTIONS_REFERENCE[section].append(k) return def _read_cmdline_opts(self, cmdline_opts): tmpv = None cmdline_flags = [] for k, v in cmdline_opts.iteritems(): if v is not None: cmdline_flags.append(k) if isinstance(v, str): if v.lower() == "true": v = True elif v.lower() == "false": v = False elif v.lower() == "none": v = None if k not in self.d: self.d[k] = v elif v != None: logger.debug( "Cmdline setting {0}: {1} => {2}".format(k, self.d[k], v)) self.d[k] = v self.d['cmdline_flags'] = cmdline_flags return def _isfloat(self, value): try: float(value) return True except: return False
[docs] def prettify_parameters(self): """Return the parameters nicely formated as a list of strings suitable for writing out to a file""" pstr = 'Parameters Used in this Run\n\n' for k, v in sorted(self.d.items()): pstr += "{0} : {1}\n".format(k, v) return pstr
[docs] def write_config_file(self, config_file=None): config = SafeConfigParser() # We need to make sure that the keys aren't converted to lower case on writing config.optionxform = str self._update_config(config) if config_file is None: # Can be None for testing config_file = os.path.join( self.d['work_dir'], self.d['name'] + ".ini") # Write config to job specific directory self.d["out_config_file"] = config_file logger.info("AMPLE configuration written to: {0}".format(config_file)) with open(config_file, "w") as out: config.write(out) return
def _update_config(self, config_parser): # Add all sections to the configparser for section in sorted(_SECTIONS_REFERENCE.keys()): if section.lower() == "no_config": continue config_parser.add_section(section) # Place all entries in our dictionary in the corresponding section in # the configparser for option in sorted(self.d.keys()): # Extract the section in which the entry needs to go sections = [k for (k, v) in _SECTIONS_REFERENCE.items() if any(entry.lower() == option.lower() for entry in v)] # Make sure we only have each option assigned to a single section section = "Unspecified" if len(sections) != 1 else sections[0] # We do not want to re-use files or at least not by default. # Comment those specifically out to avoid any errors if section.lower() == "no_config": continue elif section.lower() == "ample_info" or \ section.lower() == "files" or \ section.lower() == "unspecified": config_parser.set(section, "#" + option, str(self.d[option])) else: config_parser.set(section, option, str(self.d[option])) return