/**
** EnrichmentMap Cytoscape Plugin
**
** Copyright (c) 2008-2009 Bader Lab, Donnelly Centre for Cellular and Biomolecular
** Research, University of Toronto
**
** Contact: http://www.baderlab.org
**
** Code written by: Ruth Isserlin
** Authors: Daniele Merico, Ruth Isserlin, Oliver Stueker, Gary D. Bader
**
** This library is free software; you can redistribute it and/or modify it
** under the terms of the GNU Lesser General Public License as published
** by the Free Software Foundation; either version 2.1 of the License, or
** (at your option) any later version.
**
** This library is distributed in the hope that it will be useful, but
** WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF
** MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. The software and
** documentation provided hereunder is on an "as is" basis, and
** University of Toronto
** has no obligations to provide maintenance, support, updates,
** enhancements or modifications. In no event shall the
** University of Toronto
** be liable to any party for direct, indirect, special,
** incidental or consequential damages, including lost profits, arising
** out of the use of this software and its documentation, even if
** University of Toronto
** has been advised of the possibility of such damage.
** See the GNU Lesser General Public License for more details.
**
** You should have received a copy of the GNU Lesser General Public License
** along with this library; if not, write to the Free Software Foundation,
** Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
**
**/
// $Id$
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
// $HeadURL$
package org.baderlab.csplugins.enrichmentmap.task;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet;
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method;
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap;
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult;
import org.baderlab.csplugins.enrichmentmap.model.GSEAResult;
import org.baderlab.csplugins.enrichmentmap.model.GeneSet;
import org.baderlab.csplugins.enrichmentmap.model.Ranking;
import org.baderlab.csplugins.enrichmentmap.util.DiscreteTaskMonitor;
import org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor;
import org.cytoscape.work.AbstractTask;
import org.cytoscape.work.TaskMonitor;
/**
* Task to create a subset of the geneset in the total gmt file that contains
* only the genesets with pvalue and q-value less than threshold values
* specified by the user.
*/
public class InitializeGenesetsOfInterestTask extends AbstractTask {
private EnrichmentMap map;
// TEMPORARY - this flag exists to turn off throwing of exception if a gene set is missing
private boolean throwIfMissing = true;
public InitializeGenesetsOfInterestTask(EnrichmentMap map) {
this.map = map;
}
public void setThrowIfMissing(boolean throwIfMissing) {
this.throwIfMissing = throwIfMissing;
}
/**
* filter the genesets, restricting them to only those passing the user
* specified thresholds.
*
* @return true if successful and false otherwise.
*/
public boolean initializeSets(TaskMonitor tm) {
if(tm == null)
tm = new NullTaskMonitor();
DiscreteTaskMonitor taskMonitor = new DiscreteTaskMonitor(tm, map.getDataSetCount());
//create subset of genesets that contains only the genesets of interest with pvalue and qbalue less than values specified by the user.
//Go through each Dataset populating the Gene set of interest in each dataset object
Map<String, EMDataSet> datasets = map.getDataSets();
// count how many experiments (DataSets) contain the geneset
Optional<Integer> minExperiments = map.getParams().getMinExperiments();
Map<String,Integer> occurrences = minExperiments.isPresent() ? new HashMap<>() : null;
for(String datasetName : datasets.keySet()) {
taskMonitor.inc();
EMDataSet dataset = datasets.get(datasetName);
// all these maps use the geneset name as key
Map<String,EnrichmentResult> enrichmentResults = dataset.getEnrichments().getEnrichments();
Map<String,GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
Map<String,GeneSet> genesetsOfInterest = dataset.getGeneSetsOfInterest().getGeneSets();
// If there are no genesets associated with this dataset then get the complete set assumption being that the gmt file applies to all datasets.
if(genesets == null || genesets.isEmpty()) {
genesets = map.getAllGeneSets();
}
//if there are no enrichment Results then do nothing
if(enrichmentResults == null || enrichmentResults.isEmpty()) {
return false;
}
//iterate through the GSEA Results to figure out which genesets we want to use
for(String genesetName : enrichmentResults.keySet()) {
EnrichmentResult result = enrichmentResults.get(genesetName);
// update rank at max for leading edge calculation
if(dataset.getMethod() == Method.GSEA) {
Ranking ranks = dataset.getExpressionSets().getRanksByName(datasetName);
updateRankAtMax((GSEAResult)result, ranks);
}
GeneSet geneset = genesets.get(genesetName);
if(geneset != null) {
// while we are checking, update the size of the genesets based on post filtered data
result.setGsSize(geneset.getGenes().size());
if(result.geneSetOfInterest(map.getParams())) {
if(occurrences != null) {
occurrences.merge(genesetName, 1, (v,d) -> v + 1);
}
genesetsOfInterest.put(genesetName, geneset);
}
}
else if(throwIfMissing) { // TEMPORARY
throw new IllegalThreadStateException("The Geneset: " + genesetName + " is not found in the GMT file.");
}
}
}
// Remove gene-sets that don't pass the minimum occurrence cutoff
if(occurrences != null) {
for(EMDataSet dataset : datasets.values()) {
Map<String,GeneSet> genesetsOfInterest = dataset.getGeneSetsOfInterest().getGeneSets();
genesetsOfInterest.keySet().removeIf(geneset ->
occurrences.getOrDefault(geneset, 0) < minExperiments.get()
);
}
}
// MKTODO clear all the genesets that are not "of interest" just to free up memory
return true;
}
private void updateRankAtMax(GSEAResult current_result, Ranking ranks) {
//update the current geneset to reflect score at max
if(ranks != null) {
Set<Integer> allranks = ranks.getAllRanks();
Integer largestRank = Collections.max(allranks);
//get the max at rank for this geneset
int currentRankAtMax = current_result.getRankAtMax();
if(currentRankAtMax != -1) {
//check the ES score. If it is negative we need to adjust the rank to count from the end of the list
double NES = current_result.getNES();
int genekey = -1;
//what gene corresponds to that rank
if(NES < 0) {
//it is possible that some of the proteins in the rank list won't be rank 2gene
//conversion because some of the genes might not be in the genesets
//so the size of the list can't be used to trace up from the bottom of the
//ranks. Instead we need to get the max rank used.
currentRankAtMax = largestRank - currentRankAtMax;
//reset the rank at max to reflect that it is counted from the bottom of the list.
current_result.setRankAtMax(currentRankAtMax);
}
//check to see if this rank is in the conversion map
if(ranks.containsRank(currentRankAtMax))
genekey = ranks.getGene(currentRankAtMax);
else {
//if is possible that the gene associated with the max is not found in
//our gene 2 rank conversions because the rank by GSEA are off by 1 or two
//indexes (maybe a bug on their side).
//so depending on the NES score we need to fiddle with the rank to find the
//next protein that is the actual gene they are referring to
while(genekey == -1 && (currentRankAtMax <= largestRank && currentRankAtMax > 0)) {
if(NES < 0)
currentRankAtMax = currentRankAtMax + 1;
else
currentRankAtMax = currentRankAtMax - 1;
if(ranks.containsRank(currentRankAtMax))
genekey = ranks.getGene(currentRankAtMax);
}
}
if(genekey > -1) {
//what is the score for that gene
double scoreAtMax = ranks.getRank(genekey).getScore();
current_result.setScoreAtMax(scoreAtMax);
//update the score At max in the EnrichmentResults as well
}
}
} // end of determining the leading edge
}
@Override
public void run(TaskMonitor taskMonitor) throws Exception {
taskMonitor.setTitle("Initializing subset of genesets and results of interest");
initializeSets(taskMonitor);
taskMonitor.setStatusMessage("");
}
}