'''
30.01.2016
@author: hlfsimko
'''
import logging
import multiprocessing
import os
import traceback
from ample.constants import AMPLE_CONFIG_FILE
from ample.ensembler.constants import POLYALA, RELIABLE, ALLATOM
from ample.util import version
try:
from configparser import ConfigParser as SafeConfigParser
except ImportError:
from ConfigParser import SafeConfigParser
logger = logging.getLogger(__name__)
##############################################################
# The sections and options within need to be stored
# otherwise we cannot manage interplay between
# ConfigParser and AMPLE settings dictionary.
# Some default non-dynamic parts are stored below to avoid errors
_SECTIONS_REFERENCE = {"AMPLE_info": ["ample_version",
"ccp4_version",
"cmdline_flags"],
"Databases": ['nr',
'rosetta_db'],
"Executables": ['blast_dir',
'cluster_exe',
'fast_protein_cluster_exe',
'gesamt_exe',
'maxcluster_exe',
'mustang_exe',
'rosetta_dir',
'rosetta_fragments_exe',
'rosetta_AbinitioRelax',
'scwrl_exe',
'shelxe_exe',
'spicker_exe',
'theseus_exe'],
"Files": ['alignment_file',
'ample_log',
'bbcontacts_file',
'cluster_dir',
'config_file',
'contact_file',
'disulfide_constraints_file',
'domain_all_chains_pdb',
'ensembles',
'ensembles_directory',
'ensemble_ok',
'fasta',
'frags_3mers',
'frags_9mers',
'models',
'models_dir',
'mrbump_dir',
'mr_sequence',
'mtz',
'native_pdb',
'native_mtz',
'nmr_model_in',
'nmr_remodel_fasta',
'out_config_file',
'psipred_ss2',
'restart_pkl',
'restraints_file',
'results_path',
'score_matrix',
'score_matrix_file_list',
'sf_cif',
'single_model',
'transmembrane_octopusfile',
'transmembrane_lipofile',
'transmembrane_spanfile',
'truncation_scorefile',
'work_dir'],
# Data stored in amopt.d but not really part of AMPLE's configuration
"No_config": ["benchmark_results",
"ensembles_data",
"fasta_length",
"mrbump_results",
"sequence",
"truncation_variances",
"truncation_levels",
"truncation_nresidues"],
# In case we haven't specified anything or it is new
"Unspecified": [],
}
[docs]class DebugDict(dict):
"""A Dictionary class that prints when watched items are set or accessed"""
def __init__(self, *args, **kwargs):
dict.__init__(self, args)
self.watchkeys = []
if 'watchkeys' in kwargs:
watchkeys = kwargs['watchkeys']
if not isinstance(watchkeys, list): list(watchkeys)
self.watchkeys = watchkeys
def __getitem__(self, key):
val = dict.__getitem__(self, key)
if key in self.watchkeys:
logger.info("AMOPT GET {0}['{1}'] = {2}".format(dict.get(self, 'name_label'), key, val))
logger.info("AMOPT STACK:\n{0}".format(os.linesep.join(traceback.format_list(traceback.extract_stack())[:-1])))
return val
def __setitem__(self, key, val):
if key in self.watchkeys:
logger.info("AMOPT SET {0}['{1}'] = {2}".format(dict.get(self, 'name_label'), key, val))
logger.info("AMOPT STACK:\n{0}".format(os.linesep.join(traceback.format_list(traceback.extract_stack())[:-1])))
dict.__setitem__(self, key, val)
[docs]class AMPLEConfigOptions(object):
def __init__(self):
self.d = {} # Can't use defaultdict as need lambda function to return None, which won't pickle
#self.d = DebugDict(watchkeys=['models'])
self.cmdline_opts = {}
self.debug = False
# The original AMPLE clustering/truncation mode used in all work prior to January 2017
self.classic_mode = {
'percent': 5,
'num_clusters': 1,
'subcluster_radius_thresholds': [1, 2, 3],
'side_chain_treatments': [POLYALA, RELIABLE, ALLATOM],
}
# Test use scrwl
self.devel_mode = {
'benchmark_mode': True,
'early_terminate': False,
'shelxe_rebuild': True,
'shelxe_rebuild_arpwarp': True,
'shelxe_rebuild_buccaneer': True,
'refine_rebuild_arpwarp': False,
'refine_rebuild_buccaneer': False,
#'mr_keys' : [ [ 'PKEY', 'KILL','TIME','360' ] ],
}
self.quick_mode = {
'ensemble_max_models': 10,
'nmodels': 200,
'percent': 20,
'shelx_cycles': 5,
'refine_rebuild_arpwarp': False,
'refine_rebuild_buccaneer': False,
'phaser_kill': 15
}
self.webserver_uri = {
'shelxe_rebuild_arpwarp': False, # Need to sort out the ArpWarp licence details
'shelxe_rebuild_buccaneer': True,
'cluster_method': 'spicker_tm',
'nproc': 1,
'purge': True,
'submit_cluster': True,
'submit_max_array': 10,
'submit_qtype': "SGE",
'submit_queue': "all.q",
}
[docs] def populate(self, cmdline_opts):
# Convert Namespace to Dictionary
self.cmdline_opts = cmdline_opts = vars(cmdline_opts)
# Identify which config file to use
config_file = self._get_config_file(cmdline_opts['config_file'])
# Read the configuration file
self._read_config_file(config_file)
# Read the command line arguments
self._read_cmdline_opts(cmdline_opts)
# Set further options
self._process_options()
return
def _get_config_file(self, cmd_file=None):
config_file = os.path.abspath(
cmd_file) if cmd_file else AMPLE_CONFIG_FILE
if not os.path.isfile(config_file):
msg = "Cannot find configuration file: {0} - terminating...".format(
config_file)
logger.critical(msg)
raise RuntimeError(msg)
logger.debug("Using configuration file: {0}".format(config_file))
return config_file
def _process_options(self):
"""
Handle any top-level options that affect the overall running of AMPLE.
Notes
-----
Any specific processing of options should be handled in ample/util/options_processor.py/process_options
See Also
--------
options_processor
"""
self.d['ample_version'] = version.__version__
if "rcdir" in self.d and not self.d["rcdir"]:
self.d["rcdir"] = os.path.join(os.path.expanduser("~"), ".ample")
if "run_dir" in self.d and not self.d["run_dir"]:
self.d["run_dir"] = os.getcwd()
# Set full file paths
for k, v in self.d.iteritems():
if k in _SECTIONS_REFERENCE["Files"] and v:
self.d[k] = os.path.abspath(v)
# Use the maximum number of processors unless overridden by the user
if self.d['nproc'] is None:
if self.d['submit_cluster']:
self.d['nproc'] = 1
else:
self.d['nproc'] = multiprocessing.cpu_count()
# Check if using any preset options
if self.d['classic_mode']:
self._preset_options('classic_mode')
if self.d['devel_mode']:
self._preset_options('devel_mode')
if self.d['quick_mode']:
self._preset_options('quick_mode')
if self.d['thin_clusters']:
self._preset_options('thin_clusters')
if self.d['webserver_uri']:
self._preset_options('webserver_uri')
return
def _preset_options(self, mode):
assert hasattr(self, mode), "Unknown mode: {0}".format(mode)
logger.info("Using preset mode: {0}".format(mode))
for k, v in getattr(self, mode).iteritems():
if 'cmdline_flags' in self.d and k in self.d['cmdline_flags']:
if self.d[k] == v:
msg = 'WARNING! {0} flag {1} => {2} was duplicated on the command line!'.format(
mode, v, k)
else:
msg = "WARNING! Overriding {0} setting: {1} => {2} with {3}".format(
mode, k, v, self.d[k])
logger.critical(msg)
elif k in self.d:
logger.debug("{0} overriding default setting: {1} => {2} with {3}".format(
mode, k, v, self.d[k]))
self.d[k] = v
else:
logger.debug("{0} setting: {1} => {2}".format(mode, k, v))
self.d[k] = v
return
def _read_config_file(self, config_file):
config = SafeConfigParser()
# We need to make sure that the keys aren't converted to lower case on reading
config.optionxform = str
config.read(config_file)
for section in config.sections():
if not section in _SECTIONS_REFERENCE:
_SECTIONS_REFERENCE[section] = []
# Basic switch statement to determine the type of the variable
for k, v in config.items(section):
if v.lower() == "none":
self.d[k] = None
elif v.lower() == "true":
self.d[k] = True
elif v.lower() == "false":
self.d[k] = False
elif section.lower() == "databases":
self.d[k] = os.path.abspath(v)
elif section.lower() == "executables":
self.d[k] = os.path.abspath(v)
elif section.lower() == "files":
self.d[k] = os.path.abspath(v)
elif v.isdigit():
self.d[k] = int(v)
elif self._isfloat(v):
self.d[k] = float(v)
else:
self.d[k] = v
_SECTIONS_REFERENCE[section].append(k)
return
def _read_cmdline_opts(self, cmdline_opts):
tmpv = None
cmdline_flags = []
for k, v in cmdline_opts.iteritems():
if v is not None:
cmdline_flags.append(k)
if isinstance(v, str):
if v.lower() == "true":
v = True
elif v.lower() == "false":
v = False
elif v.lower() == "none":
v = None
if k not in self.d:
self.d[k] = v
elif v != None:
logger.debug(
"Cmdline setting {0}: {1} => {2}".format(k, self.d[k], v))
self.d[k] = v
self.d['cmdline_flags'] = cmdline_flags
return
def _isfloat(self, value):
try:
float(value)
return True
except:
return False
[docs] def prettify_parameters(self):
"""Return the parameters nicely formated as a list of strings suitable
for writing out to a file"""
pstr = 'Parameters Used in this Run\n\n'
for k, v in sorted(self.d.items()):
pstr += "{0} : {1}\n".format(k, v)
return pstr
[docs] def write_config_file(self, config_file=None):
config = SafeConfigParser()
# We need to make sure that the keys aren't converted to lower case on writing
config.optionxform = str
self._update_config(config)
if config_file is None:
# Can be None for testing
config_file = os.path.join(
self.d['work_dir'], self.d['name'] + ".ini")
# Write config to job specific directory
self.d["out_config_file"] = config_file
logger.info("AMPLE configuration written to: {0}".format(config_file))
with open(config_file, "w") as out:
config.write(out)
return
def _update_config(self, config_parser):
# Add all sections to the configparser
for section in sorted(_SECTIONS_REFERENCE.keys()):
if section.lower() == "no_config":
continue
config_parser.add_section(section)
# Place all entries in our dictionary in the corresponding section in
# the configparser
for option in sorted(self.d.keys()):
# Extract the section in which the entry needs to go
sections = [k for (k, v) in _SECTIONS_REFERENCE.items()
if any(entry.lower() == option.lower() for entry in v)]
# Make sure we only have each option assigned to a single section
section = "Unspecified" if len(sections) != 1 else sections[0]
# We do not want to re-use files or at least not by default.
# Comment those specifically out to avoid any errors
if section.lower() == "no_config":
continue
elif section.lower() == "ample_info" or \
section.lower() == "files" or \
section.lower() == "unspecified":
config_parser.set(section, "#" + option, str(self.d[option]))
else:
config_parser.set(section, option, str(self.d[option]))
return