Source code for ample.ensembler.cluster_util

"""Cluster utility module"""

__author__ = "Jens Thomas, and Felix Simkovic"
__date__ = "23 Feb 2016"
__version__ = "1.0"

import logging
import random

from ample.ensembler._ensembler import Cluster

logger = logging.getLogger(__name__)


[docs]def import_cluster(cluster_models): """Import a cluster Parameters ---------- cluster_models : list, tuple A list of models in a single cluster Returns ------- list A list containing the cluster information """ cluster = Cluster() cluster.method = "import" cluster.num_clusters = 1 cluster.index = 1 cluster.models = cluster_models return [cluster]
[docs]def random_cluster(cluster_method, max_cluster_size, models, num_clusters): """Cluster decoys using madness Parameters ---------- cluster_method : str The method to be used to cluster the decoys max_cluster_size : int The maximum number of decoys per cluster models : list A list containing structure decoys num_clusters : int The number of clusters to produce Returns ------- list A list containing the clusters Raises ------ RuntimeError Cannot ramdonly cluster so few decoys RuntimeError Cannot find random clusters """ if len(models) <= max_cluster_size + 50: # completely arbitary number raise RuntimeError('Cannot randomly cluster so few models') i = 0 clusters = [] while len(clusters) < num_clusters: if i > num_clusters * 3: raise RuntimeError('Cannot find random clusters') cmodels = set(random.sample(models, max_cluster_size)) if cmodels in clusters: logger.debug('Found duplicate cluster') continue # Data on the models cluster = Cluster() cluster.method = cluster_method cluster.num_clusters = num_clusters cluster.index = i + 1 cluster.models = list(cmodels) # convert list back to set clusters.append(cluster) i += 1 return clusters