Source code for ample.util.mrbump_util

#!/usr/bin/env ccp4-python

import copy
import cPickle
import glob
import logging
import os
import shutil
import sys

# Hack to make sure we can find the modules we need
if __name__ == "__main__":
    root = os.sep.join(os.path.abspath(__file__).split(os.sep)[:-2])
    sys.path.insert(0, os.path.join(root, "scripts"))

# Our imports
from ample.util import ample_util
from ample.util import mrbump_cmd
from ample.util import printTable

# MRBUMP imports
if not "CCP4" in os.environ.keys(): raise RuntimeError('CCP4 not found')
mrbumpd = os.path.join(os.environ['CCP4'], "share", "mrbump", "include", "parsers")
#mrbumpd = "/opt/mrbump-trunk/include/parsers"
sys.path.insert(0, mrbumpd)
import parse_arpwarp
import parse_buccaneer
import parse_phaser

TOP_KEEP = 3 # How many of the top shelxe/phaser results to keep for the gui
MRBUMP_RUNTIME = 172800 # allow 48 hours for each mrbump job

# We need a null logger so that we can be used without requiring a logger
[docs]class NullHandler(logging.Handler):
[docs]    def emit(self, record):
        pass

logger = logging.getLogger(__name__)
logger.addHandler(NullHandler())
#logger.basicConfig()

[docs]class ResultsSummary(object):
    """
    Summarise the results for a series of MRBUMP runs
    """

    def __init__(self, results=None, results_pkl=None):
        """
        Parameters
        ----------
        results_pkl : file
           A pickled AMPLE results dictionary
        """ 
        self.results = []
        # Add Null logger so we can be used without requiring a logger
        self.pname = "archive"
        self.pdir = None
        self.success = False
        
        # Extract mrbump results from a pickled results file if given one.
        if results_pkl and os.path.isfile(results_pkl):
            with open(results_pkl) as f: resd = cPickle.load(f)
            mkey = 'mrbump_results'
            if mkey in resd and len(resd[mkey]):
                self.results = resd[mkey]
        elif results:
            self.results = results
        return

[docs]    def analyseResult(self, result):
        
        mrDir = result["MR_directory"]
        
        # result.ensembleName = result.name[9:-6]
        if result["MR_program"] == "PHASER":
            if result["PHASER_pdbout"]:
                phaserP = parse_phaser.PhaserPdbParser(result["PHASER_pdbout"])
                result["PHASER_LLG"] = phaserP.LLG
                result["PHASER_TFZ"] = phaserP.TFZ
            
            phaserLog = os.path.join(mrDir, "{0}_loc0_ALL_{1}_UNMOD.log".format(result["MR_program"].lower(), result['ensemble_name']))
            if os.path.isfile(phaserLog):
                phaserP = parse_phaser.PhaserLogParser(phaserLog, noLLG=True)
                # result.phaserLog    = phaserLog
                result["PHASER_time"] = phaserP.time
                result["PHASER_killed"] = phaserP.killed
            
        #
        # SHELXE PROCESSING
        #
        #
        # Buccaneer Rebuild Processing
        #
        buccaneerLog = os.path.join(mrDir,
                                     "build/shelxe/rebuild/buccaneer",
                                     "buccaneer.log")
    
        bp = parse_buccaneer.BuccaneerLogParser()
        if os.path.isfile(buccaneerLog):
            bp.parse(buccaneerLog)
            result["SXRBUCC_final_Rfree"] = bp.finalRfree
            result["SXRBUCC_final_Rfact"] = bp.finalRfact

        #
        # Arpwarp Rebuild Processing
        #
        arpLog = os.path.join(mrDir,
                              "build/shelxe/rebuild/arpwarp",
                              "arpwarp.log")
        if os.path.isfile(arpLog):
                        ap = parse_arpwarp.ArpwarpLogParser()
                        ap.parse(arpLog)
                        result["SXRARP_final_Rfact"] = ap.finalRfact
                        result["SXRARP_final_Rfree"] = ap.finalRfree
        
        return

[docs]    def createDict(self):
        d = {}
        
        # our additional keys
        d['ensemble_name'] = None
        d['MR_program'] = None
        d['name'] = None
        d['Search_directory'] = None
        # END
        
        d['MR_directory'] = None
        d['Solution_Type'] = None
        
        d['PHASER_LLG'] = None
        d['PHASER_TFZ'] = None
        d['PHASER_RFZ'] = None
        d['PHASER_time'] = None
        d['PHASER_killed'] = None
        d['PHASER_pdbout'] = None
        d['PHASER_mtzout'] = None
        d['PHASER_logfile'] = None
        d['PHASER_version'] = None
        d['PHASER_error'] = None
        
        d['MOLREP_score'] = None
        d['MOLREP_time'] = None
        d['MOLREP_pdbout'] = None
        d['MOLREP_logfile'] = None
        d['MOLREP_version'] = None
        
        d['REFMAC_Rfact'] = None
        d['REFMAC_Rfree'] = None
        d['REFMAC_pdbout'] = None
        d['REFMAC_mtzout'] = None
        d['REFMAC_logfile'] = None
        d['REFMAC_version'] = None
        
        d['BUCC_final_Rfact'] = None
        d['BUCC_final_Rfree'] = None
        d['BUCC_pdbout'] = None
        d['BUCC_mtzout'] = None
        d['BUCC_logfile'] = None
        d['BUCC_version'] = None
        
        d['ARP_final_Rfact'] = None
        d['ARP_final_Rfree'] = None
        d['ARP_pdbout'] = None
        d['ARP_mtzout'] = None
        d['ARP_logfile'] = None
        d['ARP_version'] = None
        
        d['SHELXE_CC'] = None
        d['SHELXE_ACL'] = None
        d['SHELXE_MCL'] = None
        d['SHELXE_NC'] = None
        d['SHELXE_wMPE'] = None
        d['SHELXE_os'] = None
        d['SHELXE_time'] = None
        d['SHELXE_pdbout'] = None
        d['SHELXE_phsout'] = None
        d['SHELXE_mtzout'] = None
        d['SHELXE_logfile'] = None
        d['SHELXE_version'] = None
        
        d['SXRBUCC_version'] = None
        d['SXRBUCC_final_Rfact'] = None
        d['SXRBUCC_final_Rfree'] = None
        d['SXRBUCC_pdbout'] = None
        d['SXRBUCC_mtzout'] = None
        d['SXRBUCC_logfile'] = None
        
        d['SXRARP_version'] = None
        d['SXRARP_final_Rfact'] = None
        d['SXRARP_final_Rfree'] = None
        d['SXRARP_pdbout'] = None
        d['SXRARP_mtzout'] = None
        d['SXRARP_logfile'] = None
        
        return d
    
    def _extractOld(self, mrbump_dir):
        """Recreate a list of the jobs that have been purged"""
        old_results = {}
        self.pdir = os.path.join(mrbump_dir, self.pname)
        if not os.path.isdir(self.pdir): os.mkdir(self.pdir)
        pkls = glob.glob(os.path.join(self.pdir, "*.pkl"))
        if pkls:
            for p in pkls:
                with open(p) as f: d = cPickle.load(f)
                old_results[d['ensemble_name']] = d   
        return old_results

[docs]    def extractResults(self, mrbump_dir, purge=False):
        if not mrbump_dir or not os.path.isdir(mrbump_dir): raise RuntimeError,"Cannot find mrbump_dir: {0}".format(mrbump_dir)
        old_results = {}
        if purge: old_results = self._extractOld(mrbump_dir)
        self._extractResults(mrbump_dir, archived_ensembles=old_results.keys())
        
        if purge:
            self._purgeFailed()
            self.results += old_results.values()
            
        self.sortResults()
        self.success = any([jobSucceeded(r) for r in self.results])
        return self.results

    def _extractResults(self, mrbump_dir, archived_ensembles=None):
        """
        Find the results from running MRBUMP and sort them
        """
        mrbump_dir = os.path.abspath(mrbump_dir)
        if not os.path.isdir(mrbump_dir):
            logger.warn("extractResults - is not a valid directory: {0}".format(mrbump_dir))
            return []
                
        # Get a list of the ensembles (could get this from the amopt dictionary)
        # For now we just use the submission scripts and assume all have .sh or .sub extension
        ext = '.sh'
        if sys.platform.startswith("win"):
            ext = '.bat'
        ensembles = [ os.path.splitext(os.path.basename(e))[0] for e in glob.glob(os.path.join(mrbump_dir, "*" + ext))]
        if not len(ensembles):
            # legacy - try .sub
            ensembles = [ os.path.splitext(os.path.basename(e))[0] for e in glob.glob(os.path.join(mrbump_dir, "*.sub"))]
        if not len(ensembles):
            logger.warn("Could not extract any results from directory: {0}".format(mrbump_dir))
            return []
        
        # reset any results
        results = []
        failed = {}  # dict mapping failures to what went wrong - need to process at the end
        for ensemble in ensembles:
            # Skip ones that we've archived
            if archived_ensembles and ensemble in archived_ensembles: continue
            
            # Check job directory
            jobDir = os.path.join(mrbump_dir, 'search_' + ensemble + '_mrbump')
            if not os.path.isdir(jobDir): jobDir = os.path.join( mrbump_dir, 'search_'+ensemble )
            if not os.path.isdir(jobDir):
                # As we call this every time we monitor a job running, we don't want to print this out all the time
                # logger.debug("Missing job directory: {0}".format(jobDir))
                failed[ ensemble ] = "no_job_directory"
                continue

            logger.debug(" -- checking directory for results: {0}".format(jobDir))
            # Check if finished
            if not os.path.exists(os.path.join(jobDir, "results", "finished.txt")):
                logger.debug("Found unfinished job: {0}".format(jobDir))
                failed[ ensemble ] = "unfinished"
                continue

            # Check resultsTable.dat
            resultsDict = os.path.join(jobDir, "results", "resultsTable.pkl")
            resultsTable = os.path.join(jobDir, "results", "resultsTable.dat")
            if os.path.isfile(resultsDict):
                results += self.processMrbumpPkl(resultsDict)
            elif os.path.isfile(resultsTable):
                results += self.parseTableDat(resultsTable)
            else:
                logger.debug(" -- Could not find results files: {0} or {1}".format(resultsDict, resultsTable))
                failed[ ensemble ] = "missing-results-file"
                continue

        # Process the failed results
        if failed: results += self._processFailed(mrbump_dir, failed)
        if not len(results): logger.warn("Could not extract any results from directory: {0}".format(mrbump_dir))
        self.results = results
        return
    
[docs]    def parseTableDat(self, tfile):
        """Read a resultsTable file and return a list of MrBump results objects"""

        # Extract the various components from the path
        tfile = os.path.abspath(tfile)
        paths = tfile.split(os.sep)
        jobDir = os.path.abspath(os.sep.join(paths[:-2]))
        if paths[-3].endswith('_mrbump'): ensemble = paths[-3][7:-7]  #  remove search_..._mrbump: 'search_All_atom_trunc_0.005734_rad_1_mrbump'
        else: ensemble = paths[-3][7:]

        # List of all the possible column titles and their result object attributes
        title2key = {
                     'Model_Name'       : 'name',
                     'MR_Program'       : 'MR_program',
                     'Solution_Type'    : 'Solution_Type',
                     'final_Rfact'      : 'final_Rfact',
                     'final_Rfree'      : 'final_Rfree',
                     'SHELXE_CC'        : 'SHELXE_CC',
                     'SHELXE_ACL'       : 'SHELXE_ACL',
                     'Bucc_final_Rfact' : 'BUCC_final_Rfact',
                     'Bucc_final_Rfree' : 'BUCC_final_Rfree',
                     'ARP_final_Rfact'  : 'ARP_final_Rfact',
                     'ARP_final_Rfree'  : 'ARP_final_Rfree',
                      }

        results = []
        header = None
        nfields = None
        # Read results table to get the results
        for line in open(tfile):

            # Create a result object for each line in the output file
            result = self.createDict()
            result['ensemble_name'] = ensemble
            result['Search_directory'] = jobDir

            line = line.strip()
            if not header:
                # Processing header
                header = line.split()
                nfields = len(header)  # count as check
                for f in header:
                    # Map the data fields to their titles
                    if f not in title2key.keys():
                        logger.critical("jobDir {0}: Problem with field {1} in headerline: {2}".format(jobDir, f, line))
                        result['Solution_Type'] = "problem-header-file.dat"
                        self._getUnfinishedResult(result)
                        results.append(result)
                        return results
                continue
                # End header processing

            fields = line.split()
            if len(fields) != nfields:
                msg = "jobDir {0}: Problem getting dataline: {1}".format(jobDir, line)
                logger.debug(msg)
                result['Solution_Type'] = "corrupted-data-tfile.dat"
                self._getUnfinishedResult(result)
                results.append(result)
                continue

            # The headers tell us what attribute is in each column, so we use these and the header2attr dict to
            # set the results
            for i, title in enumerate(header):
                v = fields[i]
                if v == '--': v = None  # non-valid values in table are indicated by --
                result[title2key[title]] = v

            dirName = result['name'][:-6]
            result["MR_directory"] = os.path.join(jobDir, 'data', dirName, 'unmod', 'mr', result["MR_program"].lower())
            
            # See which pdb files were created
            if result["MR_program"] == 'PHASER':
                phaserPdb = os.path.join(result["MR_directory"],
                                       "refine",
                                       "{0}_loc0_ALL_{1}_UNMOD.1.pdb".format(result["MR_program"].lower(), result['ensemble_name']))
                if os.path.isfile(phaserPdb):
                    result['PHASER_pdbout'] = phaserPdb
            elif result.program == 'molrep':
                molrepPdb = os.path.join(result["MR_directory"],
                                       "refine",
                                       "{0}_loc0_ALL_{1}_UNMOD.1.pdb".format(result["MR_program"].lower(), result['ensemble_name']))
                if os.path.isfile(molrepPdb):
                    result['MOLREP_pdbout'] = molrepPdb

            refmacPdb = os.path.join(result["MR_directory"],
                                   'refine',
                                   "refmac_" + result["MR_program"].lower() + "_loc0_ALL_" + result['ensemble_name'] + "_UNMOD.pdb")
            if os.path.isfile(refmacPdb):
                result["REFMAC_pdbout"] = refmacPdb
                
            shelxePdb = os.path.join(result["MR_directory"], 'build', 'shelxe',
                                   "shelxe_" + result["MR_program"].lower() + "_loc0_ALL_" + result['ensemble_name'] + "_UNMOD.pdb")
            if os.path.isfile(shelxePdb):
                result["SHELXE_pdbout"] = shelxePdb
                
            buccaneerPdb = os.path.join(result["MR_directory"], 'build', 'shelxe', 'rebuild', 'buccaneer',
                                      "buccSX_output.pdb")
            if os.path.isfile(buccaneerPdb):
                result["SXRBUCC_pdbout"] = buccaneerPdb
                
            arpWarpPdb = os.path.join(result["MR_directory"], 'build', 'shelxe', 'rebuild', 'arpwarp',
                                      "refmacSX_output_warpNtrace.pdb")
            if os.path.isfile(arpWarpPdb):
                result["SXRARP_pdbout"] = arpWarpPdb
            
            self.analyseResult(result)
            results.append(result)

        return results

[docs]    def processMrbumpPkl(self, resultsPkl):
        """Process dictionary
        """
        with open(resultsPkl) as f:
            rD = cPickle.load(f)
        if not rD: return []
        
        results = []
        for name, d1 in rD.iteritems():
            for mrprog, d2 in d1.iteritems():
                # Check if all the entries are None - means job didn't run.
                # Should probably think of a better way to spot that (Search_directory is always set)
                if not any([v for k, v in d2.iteritems() if k != 'Search_directory']): continue
                # Add MR program as dictionary entry
                d = copy.copy(d2)
                del d['SearchModel_filename']
                d['name'] = name
                # name is e.g.: loc0_ALL_c1_tl100_r2_allatom_UNMOD
                d['ensemble_name'] = name[9:-6]
                d['MR_program'] = mrprog
                # Hack for old versions
                if 'JobDirectory' in d:
                    d['MR_directory'] = d['JobDirectory']
                    del d['JobDirectory']
                    d['Search_directory'] = os.sep.join(d['MR_directory'].split(os.sep)[:-5])
                if 'final_Rfree' in d:
                    d['REFMAC_Rfree'] = d['final_Rfree']
                    d['REFMAC_Rfact'] = d['final_Rfact']
                    del d['final_Rfree']
                    del d['final_Rfact']
                results.append(d)
        return results
 
    def _processFailed(self, mrbump_dir, failed):
        """Generate dictionaries for failed results
        """
        results = []
        for ensemble, reason in failed.iteritems():
            d = self.createDict()
            # name hard-coded
            # d['name'] = "loc0_ALL_" + ensemble + "_UNMOD"
            d['name'] = "loc0_ALL_" + ensemble + "_UNMOD"
            d['ensemble_name'] = ensemble 
            d['Search_directory'] = os.path.join(mrbump_dir, 'search_' + ensemble + '_mrbump')
            d['Solution_Type'] = reason
            results.append(d)
            
        logger.debug("Added {0} MRBUMP result failures".format(len(failed)))
        return results
    
    def _purgeFailed(self):
        """Remove the MRBUMP directories of any jobs that don't pass the keep criteria and archive their job dictionaries"""
        # Skip any that are unfinished
        completed = [ r for r in self.results if not(job_unfinished(r)) ]
        if completed:
            # Keep the top TOP_KEEP SHELXE_CC and PHASER_TFZ - these could be the same jobs and we may not even
            # have TOP_KEEP completed
            to_keep = []
            for r in self.sortResults(completed, prioritise='SHELXE_CC')[0:min(len(completed),TOP_KEEP)]:
                if r not in to_keep: to_keep.append(r)
            for r in self.sortResults(completed, prioritise='PHASER_TFZ')[0:min(len(completed),TOP_KEEP)]:
                if r not in to_keep: to_keep.append(r)

            # Remove the directories and archive the dictionaries
            for r in completed:
                if r not in to_keep:
                    pkl = os.path.join(self.pdir, "{0}.pkl".format(r['ensemble_name']))
                    with open(pkl, 'w') as f: cPickle.dump(r, f)
                    shutil.rmtree(r['Search_directory'])
        
#         for r in results:
#             if r['Solution_Type'] == "unfinished" or r['Solution_Type'] == "no_job_directory": continue
#             to_keep.append(r)
#             if not (jobSucceeded(r) or r['Solution_Type'] is "MARGINAL") :
#                 pkl = os.path.join(self.pdir, "{0}.pkl".format(r['ensemble_name']))
#                 with open(pkl, 'w') as f: cPickle.dump(r, f)
#                 shutil.rmtree(r['Search_directory'])
        return

[docs]    def results_table(self, results):
        resultsTable = []
        keys = ['ensemble_name', 'MR_program', 'Solution_Type']
        keys += _resultsKeys(results)
        resultsTable.append(keys)
        for r in results: resultsTable.append([r[k] for k in keys])
        return resultsTable

[docs]    def sortResults(self, prioritise=None):
        """
        Sort the results
        """
        # Check each result to see what attributes are set and use this to work out how we rate this run
        
        SHELXE = False
        BUCC = False
        ARP = False
        REFMAC = False
        PHASER = False
        for r in self.results:
            if 'SHELXE_CC' in r and r['SHELXE_CC'] and float(r['SHELXE_CC']) > 0.0:
                SHELXE = True
            if 'BUCC_final_Rfact' in r and r['BUCC_final_Rfact'] and float(r['BUCC_final_Rfact']) < 1.0:
                BUCC = True
            if 'ARP_final_Rfree' in r and r['ARP_final_Rfree'] and float(r['ARP_final_Rfree']) < 1.0:
                ARP = True
            if 'REFMAC_Rfree' in r and r['REFMAC_Rfree'] and float(r['REFMAC_Rfree']) < 1.0:
                REFMAC = True
            if 'PHASER_TFZ' in r and r['PHASER_TFZ'] and float(r['PHASER_TFZ']) > 0.0:
                PHASER = True
            
        reverse = False
        sortf = False
        if SHELXE and not prioritise == "PHASER_TFZ":
            reverse = True
            sortf = lambda x: float(0) if x['SHELXE_CC']  is None else float(x['SHELXE_CC'])
        elif BUCC and not prioritise == "PHASER_TFZ":
            sortf = lambda x: float('inf') if x['BUCC_final_Rfact']  is None else float(x['BUCC_final_Rfact'])
        elif ARP and not prioritise == "PHASER_TFZ":
            sortf = lambda x: float('inf') if x['ARP_final_Rfree']  is None else float(x['ARP_final_Rfree'])
        elif REFMAC and not prioritise == "PHASER_TFZ":
            sortf = lambda x: float('inf') if x['REFMAC_Rfree']  is None else float(x['REFMAC_Rfree'])
        elif PHASER:
            reverse = True
            sortf = lambda x: float(0) if x['PHASER_TFZ']  is None else float(x['PHASER_TFZ'])
            
        if sortf:
            # Now sort by the key
            self.results.sort(key=sortf, reverse=reverse)
        return

[docs]    def summariseResults(self, mrbump_dir):
        """Return a string summarising the results"""

        results = self.extractResults(mrbump_dir)
        if len(results):
            return self.summaryString()
        else:
            return "\n!!! No results found in directory: {0}\n".format(mrbump_dir)

[docs]    def summaryString(self):
        """Return a string suitable for printing the sorted results"""

        resultsTable = self.results_table(self.results)

        # Format the results
        table = printTable.Table()
        summary = table.pprint_table(resultsTable)

        r = "\n\nOverall Summary:\n\n"
        r += summary

        # Hack need to think of a better way to do this when there are no valid results
        top = self.results[0]
        k = None
        for p in ['Search_directory','MR_directory']:
            if p in top.keys(): k = p
        assert k,"Missing search directory key in results dictionary"
        if top[k]:
            r += '\nBest Molecular Replacement results so far are in:\n\n'
            r += top[k]
        r += '\n\n'

        return r
    
[docs]    def topFiles(self, num_results=3):
        """Return a list of dictionaries listing the top num_results PDB and MTZ files
        
        Parameters
        ----------
        num_results : int
           How many of the top results to return
    
        Returns
        -------
        topf : list
           A list of dictionaries, one per result, with xyz, mtz and info keys
        
        """
        topf = []
        # list of PDB, MTZ, Explanation of file type - ordered by their desirability
        poss = [ ('SXRARP', 'SXRARP_pdbout','SXRARP_mtzout', 'ARPWARP rebuild of SHELXE trace of MR result'),
                 ('SXRBUCC', 'SXRBUCC_pdbout','SXRBUCC_mtzout', 'BUCCANEER rebuild of SHELXE trace of MR result'),
                 ('SHELXE', 'SHELXE_pdbout','SHELXE_mtzout', 'SHELXE trace of MR result'),
                 ('ARP', 'ARP_pdbout','ARP_mtzout', 'ARPWARP rebuild of MR result'),
                 ('BUCC', 'BUCC_pdbout','BUCC_mtzout', 'BUCCANEER rebuild of MR result'),
                 ('REFMAC,', 'REFMAC_pdbout','REFMAC_mtzout', 'REFMAC-refined MR result') ]
        for result in self.results[0 : min(num_results, len(self.results)+1) ]:
            for stype, pdb_key, mtz_key, source in poss:
                if pdb_key in result and result[pdb_key] and mtz_key in result and result[mtz_key]:
                    # Don't check paths for now as it screws up unittests as files don't actually exist
                    #if not (os.path.isfile(result[pdb_key]) and os.path.isfile(result[mtz_key])): continue
                    topf.append({ 'name' : result['ensemble_name'], 
                                  'type' : stype,
                                  'info' : source,
                                  'pdb' : result[pdb_key],
                                  'mtz' : result[mtz_key] })
                    break # Stop as soon as we find one
        if len(topf): return topf
            
def _resultsKeys(results):
    keys = []
    # Build up list of keys we want to print based on what we find in the results
    if any([True for r in results if r['PHASER_LLG']]):
        keys += ['PHASER_LLG']
    if any([True for r in results if r['PHASER_TFZ']]):
        keys += ['PHASER_TFZ']
    if any([True for r in results if r['REFMAC_Rfree'] and r['REFMAC_Rfree'] < 1.0]):
        keys += ['REFMAC_Rfact', 'REFMAC_Rfree']
    if any([True for r in results if r['BUCC_final_Rfact'] and r['BUCC_final_Rfact'] < 1.0]):
        keys += ['BUCC_final_Rfact', 'BUCC_final_Rfree']
    if any([True for r in results if r['ARP_final_Rfact'] and r['ARP_final_Rfact'] < 1.0]):
        keys += ['ARP_final_Rfact', 'ARP_final_Rfree']
    if any([True for r in results if r['SHELXE_CC']]):
        keys += ['SHELXE_CC', 'SHELXE_ACL']
    if any([True for r in results if r['SXRBUCC_final_Rfact']]):
        keys += ['SXRBUCC_final_Rfact', 'SXRBUCC_final_Rfree']
    if any([True for r in results if r['SXRARP_final_Rfact']]):
        keys += ['SXRARP_final_Rfact', 'SXRARP_final_Rfree']
    return keys

[docs]def checkSuccess(script_path):
    """Check if a job ran successfully.
    
    Parameters
    ----------
    script_path : str
       Path to the MrBUMP script
    
    Returns
    -------
    bool
       True if success
   
    Notes
    -----
    Success is assumed as a SHELX CC score of >= SHELXSUCCESS

    """
    directory, script = os.path.split(script_path)
    scriptname = os.path.splitext(script)[0]
    rfile = os.path.join(directory, 'search_' + scriptname + '_mrbump', 
                         'results', 'resultsTable.pkl')
    # print "{0} checking for file: {1}".format(multiprocessing.current_process().name,rfile)
    if not os.path.isfile(rfile):
        # print "{0} cannot find results file: {1}".format(multiprocessing.current_process().name,rfile)
        return False
    
    # Results summary object to parse table file
    mrbR = ResultsSummary()
    
    # Put into order and take top one
    results = mrbR.processMrbumpPkl(rfile)
    mrbR.sortResults(results)
    return jobSucceeded(results[0])

[docs]def finalSummary(amoptd):
    """Print a final summary of the job"""
    
    mrbump_data = amoptd['mrbump_results']
    if not mrbump_data:
        return "Could not find any MRBUMP results in directory: {0}!".format(amoptd['mrbump_dir'])
    
    if 'ensembles_data' in amoptd and not (amoptd['ideal_helices'] or amoptd['homologs'] or amoptd['single_model_mode']):
        results = []
        # Merge dictionaries together
        ensembles_data = amoptd['ensembles_data']
        for mrb in mrbump_data:
            d = copy.copy(mrb)
            for ed in ensembles_data:
                if ed['name'] == d['ensemble_name']:
                    d.update(ed)
                    results.append(d)
        keys = ['ensemble_name', 'Solution_Type', 'MR_program']
        keys += _resultsKeys(results)
        keys += ['subcluster_num_models', 'num_residues']
    else:
        results = mrbump_data
        keys = ['name', 'Solution_Type', 'MR_program']
        keys += _resultsKeys(results)
        
    resultsTable = []
    resultsTable.append(keys)
    for result in results:
        resultLine = []
        for k in keys:
            resultLine.append(result[k])
        resultsTable.append(resultLine)

    # Format the results
    table = printTable.Table()
    summary = table.pprint_table(resultsTable)

    r = "\n\nOverall Summary:\n\n"
    r += summary
    if len(results) and "MR_directory" in results[0]:
        r += '\nBest Molecular Replacement results so far are in:\n\n'
        r += str(results[0]["MR_directory"])
        r += '\n\n'
    return r

[docs]def jobSucceeded(job_dict):
    PHASER_TFZ = 8.0
    PHASER_LLG = 120
    RFREE = 0.4
    SHELXE_CC = 25.0
    SHELXE_ACL = 10
    success = False
    if 'SHELXE_CC' in job_dict and job_dict['SHELXE_CC'] and float(job_dict['SHELXE_CC']) >= SHELXE_CC and \
       'SHELXE_ACL' in job_dict and job_dict['SHELXE_ACL'] and float(job_dict['SHELXE_ACL']) >= SHELXE_ACL:
        success = True
    elif 'BUCC_final_Rfree' in job_dict and job_dict['BUCC_final_Rfree'] and float(job_dict['BUCC_final_Rfree']) <= RFREE:
        success = True
    elif 'ARP_final_Rfree' in job_dict and job_dict['ARP_final_Rfree'] and float(job_dict['ARP_final_Rfree']) <= RFREE:
        success = True
    elif 'REFMAC_Rfree' in job_dict and job_dict['REFMAC_Rfree'] and float(job_dict['REFMAC_Rfree']) <= RFREE:
        success = True
    elif 'PHASER_LLG' in job_dict and 'PHASER_TFZ' in job_dict and job_dict['PHASER_LLG'] and job_dict['PHASER_TFZ'] and \
    float(job_dict['PHASER_LLG']) >= PHASER_LLG and float(job_dict['PHASER_TFZ']) >= PHASER_TFZ:
        success = True
    return success

[docs]def job_unfinished(job_dict):
    if not 'Solution_Type' in job_dict: return True
    return job_dict['Solution_Type'] == "unfinished" or job_dict['Solution_Type'] == "no_job_directory"

[docs]def unfinished_scripts(amoptd):
    """See if there are any unfinished mrbump jobs in a mrbump directory and return a list of the scripts"""
    
    if not 'mrbump_dir' in amoptd or amoptd['mrbump_dir'] is None or not os.path.isdir(amoptd['mrbump_dir']): return []

    amoptd['mrbump_results'] = ResultsSummary().extractResults(amoptd['mrbump_dir'])
    if not len(amoptd['mrbump_results']): return []
    
    scripts = []
    for r in [ r for r in amoptd['mrbump_results'] if job_unfinished(r) ]:
        #print "DIR ", r['MR_directory']
        #print "DIR2 ", r['Search_directory']
        scripts.append(os.path.join(amoptd['mrbump_dir'], r['ensemble_name']+ample_util.SCRIPT_EXT))
    return scripts

[docs]def write_mrbump_files(ensemble_pdbs, amoptd, job_time=MRBUMP_RUNTIME, ensemble_options=None, directory=None):
    """Write the MRBUMP job files for all the ensembles.

    Arguments:
    ensemble_pdbs -- list of the ensembles, each a single pdb file.
    amoptd -- dictionary with job options.
    job_time -- maximum permissible runtime (mainly used for batch queueing systems).
    ensemble_options -- dictionary with ensemble-specific keywords e.g. ensemble_options[ensemble_name] = {'ncopies' : ncopies}
    directory -- working directory to write files to.
    """
    if not directory: directory = os.getcwd()
    
    job_scripts = []
    keyword_options = {}
    for ensemble_pdb in ensemble_pdbs:
        name = os.path.splitext(os.path.basename(ensemble_pdb))[0] # Get name from pdb path
        
        # Get any options specific to this ensemble
        if ensemble_options and name in ensemble_options: keyword_options = ensemble_options[name]
        
        # Generate dictionary with all the options for this job and write to keyword file
        keyword_dict = mrbump_cmd.keyword_dict(ensemble_pdb, name, amoptd, keyword_options)
        keyword_file = os.path.join(directory,name+'.mrbump')
        keyword_str = mrbump_cmd.mrbump_keyword_file(keyword_dict)
        with open(keyword_file,'w') as f: f.write(keyword_str)
        
        script = write_jobscript(name,
                                 keyword_file,
                                 amoptd,
                                 directory = directory,
                                 job_time = job_time)
        job_scripts.append(script)
            
    if not len(job_scripts):
        msg = "No job scripts created!"
        logging.critical(msg)
        raise RuntimeError, msg
    
    return job_scripts

[docs]def write_jobscript(name, keyword_file, amoptd, directory=None, job_time=86400, extra_options={}):
    """
    Create the script to run MrBump for this PDB.
    """
    if not directory: directory = os.getcwd()
        
    # Next the script to run mrbump
    script_path = os.path.abspath(os.path.join(directory, name+ample_util.SCRIPT_EXT))
    with open(script_path, "w") as job_script:
        # Header
        if not sys.platform.startswith("win"):
            script_header = '#!/bin/sh\n'
            script_header += '[[ ! -d $CCP4_SCR ]] && mkdir $CCP4_SCR\n\n'
            job_script.write(script_header)
        
        # Get the mrbump command-line
        jobcmd = mrbump_cmd.mrbump_cmd(name, amoptd['mtz'], amoptd['mr_sequence'], keyword_file)
        job_script.write(jobcmd)
        
    # Make executable
    os.chmod(script_path, 0o777)
    logger.debug("Wrote MRBUMP script: {0}".format(script_path))

    return script_path


if __name__ == "__main__":
    if not len(sys.argv) == 2: 
        mrbump_dir = os.getcwd()
    else:
        mrbump_dir = os.path.join(os.getcwd(), sys.argv[1])
        
    logging.basicConfig()
    logging.getLogger().setLevel(logging.DEBUG)

    r = ResultsSummary()
    print r.summariseResults(mrbump_dir)