package org.baderlab.csplugins.enrichmentmap.task;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters;
import org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters.SimilarityMetric;
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet;
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap;
import org.baderlab.csplugins.enrichmentmap.model.GeneSet;
import org.baderlab.csplugins.enrichmentmap.model.GenesetSimilarity;
import org.baderlab.csplugins.enrichmentmap.model.SimilarityKey;
import org.baderlab.csplugins.enrichmentmap.util.DiscreteTaskMonitor;
import org.cytoscape.work.AbstractTask;
import org.cytoscape.work.TaskMonitor;
import com.google.common.collect.Sets;
/**
* Three cases:
* - single edges
* - multiple edges
* - compound edges
*/
public class ComputeSimilarityTaskParallel extends AbstractTask {
private final EnrichmentMap map;
private final Consumer<Map<SimilarityKey,GenesetSimilarity>> consumer;
public ComputeSimilarityTaskParallel(EnrichmentMap map, Consumer<Map<SimilarityKey,GenesetSimilarity>> consumer) {
this.map = map;
this.consumer = consumer;
}
@Override
public void run(TaskMonitor tm) throws InterruptedException {
int cpus = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(cpus);
// boolean compound = map.isDistinctExpressionSets() ? !map.getParams().getCreateDistinctEdges() : true;
// boolean distinct = map.isDistinctExpressionSets() && map.getParams().getCreateDistinctEdges();
boolean distinct = map.getParams().getCreateDistinctEdges();
Map<SimilarityKey,GenesetSimilarity> similarities = startComputeSimilarities(tm, executor, distinct, !distinct);
// Support cancellation
Timer timer = new Timer();
timer.scheduleAtFixedRate(new TimerTask() {
public void run() {
if(cancelled) {
executor.shutdownNow();
}
}
}, 0, 1000);
executor.shutdown();
executor.awaitTermination(3, TimeUnit.HOURS);
timer.cancel();
if(!cancelled)
consumer.accept(similarities);
}
private Map<SimilarityKey,GenesetSimilarity> startComputeSimilarities(TaskMonitor tm, ExecutorService executor, boolean distinct, boolean compound) {
Set<String> names = map.getAllGeneSetOfInterestNames();
Map<String,Set<Integer>> unionedGenesets = compound ? map.unionAllGeneSetsOfInterest() : null;
DiscreteTaskMonitor taskMonitor = discreteTaskMonitor(tm, names.size());
String edgeType = map.getParams().getEnrichmentEdgeType();
Map<SimilarityKey,GenesetSimilarity> similarities = new ConcurrentHashMap<>();
Collection<EMDataSet> dataSets = map.getDataSetList();
for(final String geneset1Name : names) {
// Compute similarities in batches, creating a Runnable for every similarity pair would create too many objects
executor.execute(() -> {
for(final String geneset2Name : names) {
if (geneset1Name.equalsIgnoreCase(geneset2Name))
continue; //don't compare two identical gene sets
if(distinct) {
for(EMDataSet dataset : dataSets) {
SimilarityKey key = new SimilarityKey(geneset1Name, geneset2Name, edgeType, dataset.getName());
if(!similarities.containsKey(key)) {
Map<String,GeneSet> genesets = dataset.getGeneSetsOfInterest().getGeneSets();
GeneSet geneset1 = genesets.get(geneset1Name);
GeneSet geneset2 = genesets.get(geneset2Name);
if(geneset1 != null && geneset2 != null) {
// returns null if the similarity coefficient doesn't pass the cutoff
GenesetSimilarity similarity = computeGenesetSimilarity(map.getParams(), geneset1Name, geneset2Name, geneset1.getGenes(), geneset2.getGenes(), dataset.getName());
if(similarity != null) {
similarities.put(key, similarity);
}
}
}
}
}
if(compound) {
SimilarityKey key = new SimilarityKey(geneset1Name, geneset2Name, edgeType, null);
if(!similarities.containsKey(key)) {
Set<Integer> geneset1 = unionedGenesets.get(geneset1Name);
Set<Integer> geneset2 = unionedGenesets.get(geneset2Name);
// returns null if the similarity coefficient doesn't pass the cutoff
GenesetSimilarity similarity = computeGenesetSimilarity(map.getParams(), geneset1Name, geneset2Name, geneset1, geneset2, "compound");
if(similarity != null) {
similarities.put(key, similarity);
}
}
}
}
taskMonitor.inc();
});
}
return similarities;
}
private static DiscreteTaskMonitor discreteTaskMonitor(TaskMonitor tm, int size) {
DiscreteTaskMonitor taskMonitor = new DiscreteTaskMonitor(tm, size);
taskMonitor.setTitle("Computing Geneset Similarities...");
taskMonitor.setStatusMessageTemplate("Computing Geneset Similarity: {0} of {1} tasks");
return taskMonitor;
}
public static double computeSimilarityCoeffecient(EMCreationParameters params, Set<?> intersection, Set<?> union, Set<?> genes1, Set<?> genes2) {
// Note: Do not call intersection.size() or union.size() more than once on a Guava SetView!
// It is a potentially slow operation that needs to be recalcuated each time it is called.
if (params.getSimilarityMetric() == SimilarityMetric.JACCARD) {
return (double) intersection.size() / (double) union.size();
}
else if (params.getSimilarityMetric() == SimilarityMetric.OVERLAP) {
return (double) intersection.size() / Math.min((double) genes1.size(), (double) genes2.size());
}
else {
// It must be combined. Compute a combination of the overlap and jaccard coefecient. We need both the Jaccard and the Overlap.
double intersectionSize = (double) intersection.size(); // do not call size() more than once on the same SetView
double jaccard = intersectionSize / (double) union.size();
double overlap = intersectionSize / Math.min((double) genes1.size(), (double) genes2.size());
double k = params.getCombinedConstant();
return (k * overlap) + ((1 - k) * jaccard);
}
}
static GenesetSimilarity computeGenesetSimilarity(EMCreationParameters params, String geneset1Name, String geneset2Name, Set<Integer> geneset1, Set<Integer> geneset2, String dataset) {
Set<Integer> intersection = Sets.intersection(geneset1, geneset2);
Set<Integer> union = Sets.union(geneset1, geneset2);
double coeffecient = computeSimilarityCoeffecient(params, intersection, union, geneset1, geneset2);
if(coeffecient < params.getSimilarityCutoff())
return null;
String edgeType = params.getEnrichmentEdgeType();
GenesetSimilarity similarity = new GenesetSimilarity(geneset1Name, geneset2Name, coeffecient, edgeType, intersection, dataset);
return similarity;
}
}