package org.baderlab.csplugins.enrichmentmap.model;
import static com.google.common.base.Strings.isNullOrEmpty;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method;
import org.baderlab.csplugins.enrichmentmap.util.NetworkUtil;
import org.cytoscape.model.CyNetwork;
import org.cytoscape.model.CyNetworkManager;
import org.cytoscape.service.util.CyServiceRegistrar;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
/***
* An Enrichment Map object contains the minimal information needed to build an enrichment map.
*/
public class EnrichmentMap {
private transient CyServiceRegistrar serviceRegistrar;
private long networkID;
/** Parameters used to create this map */
private final EMCreationParameters params;
private Map<String, EMDataSet> dataSets = new HashMap<>();
/** The set of genes defined in the Enrichment map. */
private BiMap<Integer, String> genes = HashBiMap.create();
/** Post analysis signature genesets associated with this map.*/
private Map<String, EMSignatureDataSet> signatureDataSets = new HashMap<>();
private int NumberOfGenes = 0;
private boolean isLegacy = false;
private boolean isDistinctExpressionSets = false;
private final Object lock = new Object();
/**
* Class Constructor Given - EnrichmentnMapParameters create a new
* enrichment map. The parameters contain all cut-offs and file names for the analysis
*/
public EnrichmentMap(EMCreationParameters params, CyServiceRegistrar serviceRegistrar) {
this.params = params;
this.serviceRegistrar = serviceRegistrar;
}
public EMDataSet createDataSet(String name, Method method, DataSetFiles files) {
if (dataSets.containsKey(name))
throw new IllegalArgumentException("DataSet with name " + name + " already exists in this enrichment map");
if (isNullOrEmpty(files.getEnrichmentFileName1()) && isNullOrEmpty(files.getGMTFileName())
&& isNullOrEmpty(files.getExpressionFileName()))
throw new IllegalArgumentException("At least one of the required files must be given");
EMDataSet ds = new EMDataSet(this, name, method, files);
dataSets.put(name, ds);
initializeFiles(ds);
return ds;
}
/**
* Method to transfer files specified in the parameters to the objects they correspond to.
*/
private void initializeFiles(EMDataSet ds) {
DataSetFiles files = ds.getDataSetFiles();
if (!isNullOrEmpty(files.getGMTFileName()))
ds.getSetOfGeneSets().setFilename(files.getGMTFileName());
// expression files
if (!isNullOrEmpty(files.getExpressionFileName()))
ds.getExpressionSets().setFilename(files.getExpressionFileName());
// enrichment results files
if (!isNullOrEmpty(files.getEnrichmentFileName1()))
ds.getEnrichments().setFilename1(files.getEnrichmentFileName1());
if (files.getEnrichmentFileName2() != null && !files.getEnrichmentFileName2().isEmpty())
ds.getEnrichments().setFilename2(files.getEnrichmentFileName2());
//phenotypes
if (!isNullOrEmpty(files.getPhenotype1()))
ds.getEnrichments().setPhenotype1(files.getPhenotype1());
if (!isNullOrEmpty(files.getPhenotype2()))
ds.getEnrichments().setPhenotype2(files.getPhenotype2());
//rank files - dataset1
if (!isNullOrEmpty(files.getRankedFile())) {
if (ds.getMethod() == Method.GSEA) {
ds.getExpressionSets().createNewRanking(Ranking.GSEARanking);
ds.getExpressionSets().getRanksByName(Ranking.GSEARanking).setFilename(files.getRankedFile());
} else {
ds.getExpressionSets().createNewRanking(ds.getName());
ds.getExpressionSets().getRanksByName(ds.getName()).setFilename(files.getRankedFile());
}
}
}
public void setServiceRegistrar(CyServiceRegistrar registrar) {
this.serviceRegistrar = registrar;
}
public boolean containsGene(String gene) {
return genes.containsValue(gene);
}
public String getGeneFromHashKey(Integer hash) {
return genes.get(hash);
}
public Integer getHashFromGene(String gene) {
// MKTOD should I toUpperCase?
return genes.inverse().get(gene);
}
public Collection<String> getAllGenes() {
return Collections.unmodifiableCollection(genes.values());
}
public Optional<Integer> addGene(String gene) {
gene = gene.toUpperCase();
Map<String,Integer> geneToHash = genes.inverse();
Integer hash = geneToHash.get(gene);
if(hash != null)
return Optional.of(hash);
Integer newHash = ++NumberOfGenes;
genes.put(newHash, gene);
return Optional.of(newHash);
}
@Deprecated // this is here to support legacy session loading
public void addGene(String gene, int id) {
genes.put(id, gene);
if(id > NumberOfGenes)
NumberOfGenes = id;
}
public int getNumberOfGenes() {
return NumberOfGenes;
}
public void setNumberOfGenes(int numberOfGenes) {
NumberOfGenes = numberOfGenes;
}
/**
* Given a set of genesets Go through the genesets and extract all the genes
* Return - hashmap of genes to hash keys (used to create an expression file
* when it is not present so user can use expression viewer to navigate
* genes in a geneset without have to generate their own dummy expression file)
*/
public Map<String, Integer> getGeneSetsGenes(Collection<GeneSet> currentGeneSets) {
Map<String, Integer> geneSetsGenes = new HashMap<>();
for (GeneSet geneSet : currentGeneSets) {
// Compare the HashSet of dataset genes to the HashSet of the current Geneset
// only keep the genes from the geneset that are in the dataset genes
for (Integer geneKey : geneSet.getGenes()) {
// Get the current geneName
if (genes.containsKey(geneKey)) {
String name = genes.get(geneKey);
geneSetsGenes.put(name, geneKey);
}
}
}
return geneSetsGenes;
}
/**
* Filter all the genesets by the dataset genes If there are multiple sets
* of genesets make sure to filter by the specific dataset genes
*/
public void filterGenesets() {
for (EMDataSet ds : dataSets.values()) {
// only filter the genesets if dataset genes are not null or empty
if (ds.getDataSetGenes() != null && !ds.getDataSetGenes().isEmpty()) {
ds.getSetOfGeneSets().filterGeneSets(ds.getDataSetGenes());
}
}
}
public String getName() {
final String undefined = "-- UNDEFINED --";
if(serviceRegistrar == null)
return undefined;
CyNetworkManager networkManager = serviceRegistrar.getService(CyNetworkManager.class);
if(networkManager == null)
return undefined;
CyNetwork net = networkManager.getNetwork(networkID);
if(net == null)
return undefined;
return NetworkUtil.getName(net);
}
/*
* Return a hash of all the genesets in the set of genesets regardless of which dataset it comes from.
*/
@Deprecated
public Map<String, GeneSet> getAllGeneSets() {
// Go through each dataset and get the genesets from each
Map<String, GeneSet> allGeneSets = new HashMap<>();
synchronized (lock) {
// If a GeneSet appears in more than one DataSet, then its totally arbitrary which version of it gets picked
// If a GeneSet appears in an enrichment file it will override the one with the same name in the global GMT file
for (EMDataSet ds : dataSets.values()) {
allGeneSets.putAll(ds.getSetOfGeneSets().getGeneSets());
}
if (signatureDataSets != null) {
for (EMSignatureDataSet sds : signatureDataSets.values())
allGeneSets.putAll(sds.getGeneSetsOfInterest().getGeneSets());
}
}
return allGeneSets;
}
/*
* Return a hash of all the genesets but not inlcuding the signature genesets.
*/
@Deprecated
public Map<String, GeneSet> getEnrichmentGenesets() {
//go through each dataset and get the genesets from each
Map<String, GeneSet> allGeneSets = new HashMap<>();
for (EMDataSet ds : dataSets.values()) {
Map<String, GeneSet> geneSets = ds.getSetOfGeneSets().getGeneSets();
allGeneSets.putAll(geneSets);
}
return allGeneSets;
}
@Deprecated
public Map<String, GeneSet> getAllGeneSetsOfInterest() {
//go through each dataset and get the genesets from each
Map<String, GeneSet> allGeneSets = new HashMap<>();
for (EMDataSet ds : dataSets.values())
allGeneSets.putAll(ds.getGeneSetsOfInterest().getGeneSets());
// if there are post analysis genesets, add them to the set of all genesets
if (signatureDataSets != null) {
for (EMSignatureDataSet sds : signatureDataSets.values())
allGeneSets.putAll(sds.getGeneSetsOfInterest().getGeneSets());
}
return allGeneSets;
}
// MKTODO write a JUnit
public Map<String, Set<Integer>> unionAllGeneSetsOfInterest() {
Map<String, Set<Integer>> allGeneSets = new HashMap<>();
for (EMDataSet ds : getDataSetList()) {
Map<String, GeneSet> geneSets = ds.getGeneSetsOfInterest().getGeneSets();
geneSets.forEach((name, gs) -> {
allGeneSets.computeIfAbsent(name, k -> new HashSet<>()).addAll(gs.getGenes());
});
}
return allGeneSets;
}
// MKTODO write a JUnit
public Set<String> getAllGeneSetOfInterestNames() {
Set<String> names = new HashSet<>();
for (EMDataSet ds : getDataSetList()) {
Map<String, GeneSet> geneSets = ds.getGeneSetsOfInterest().getGeneSets();
names.addAll(geneSets.keySet());
}
return names;
}
public String findGeneSetDescription(String genesetName) {
for(EMDataSet ds : dataSets.values()) {
GeneSet gs = ds.getGeneSetsOfInterest().getGeneSets().get(genesetName);
if(gs != null)
return gs.getDescription();
}
return null;
}
public Map<String, EMDataSet> getDataSets() {
return dataSets;
}
/**
* Returns all the DataSets in a predictable order.
*/
public List<EMDataSet> getDataSetList() {
List<EMDataSet> list = new ArrayList<>(dataSets.values());
list.sort(Comparator.naturalOrder());
return list;
}
public void setDataSets(Map<String, EMDataSet> dataSets) {
this.dataSets = dataSets;
}
public int getDataSetCount() {
return dataSets.size();
}
public EMDataSet getDataSet(String dataSetName) {
return dataSets.get(dataSetName);
}
/**
* Returns all the DataSet names in a predictable order.
*/
public List<String> getDataSetNames() {
return getDataSetList().stream().map(EMDataSet::getName).collect(Collectors.toList());
}
public EMCreationParameters getParams() {
return params;
}
public long getNetworkID() {
return networkID;
}
public void setNetworkID(long networkID) {
this.networkID = networkID;
}
public static Set<Long> getNodesUnion(Collection<AbstractDataSet> dataSets) {
return getUnion(dataSets, AbstractDataSet::getNodeSuids);
}
public static Set<Long> getNodesIntersection(Collection<? extends AbstractDataSet> dataSets) {
return getIntersection(dataSets, AbstractDataSet::getNodeSuids);
}
/**
* Returns the SUIDs for all the gene-sets in the given collection of DataSets.
* Each returned gene-set is contained in at least one of the given DataSets.
*
* Note, this will only return distinct edges, not compound edges.
*/
public static Set<Long> getEdgesUnion(Collection<AbstractDataSet> dataSets) {
return getUnion(dataSets, AbstractDataSet::getEdgeSuids);
}
/**
* Returns the SUIDs for all the gene-sets in the given collection of DataSets.
* Each returned gene-set is contained all of the given DataSets.
*
* Note, this will only return distinct edges, not compound edges.
*/
public static Set<Long> getEdgesIntersection(Collection<? extends AbstractDataSet> dataSets) {
return getIntersection(dataSets, AbstractDataSet::getEdgeSuids);
}
private static Set<Long> getUnion(Collection<? extends AbstractDataSet> dataSets, Function<AbstractDataSet,Set<Long>> suidSupplier) {
if (dataSets.isEmpty())
return Collections.emptySet();
Set<Long> suids = new HashSet<>();
for (AbstractDataSet ds : dataSets) {
suids.addAll(suidSupplier.apply(ds));
}
return suids;
}
private static Set<Long> getIntersection(Collection<? extends AbstractDataSet> dataSets, Function<AbstractDataSet,Set<Long>> suidSupplier) {
if (dataSets.isEmpty())
return Collections.emptySet();
Iterator<? extends AbstractDataSet> iter = dataSets.iterator();
AbstractDataSet first = iter.next();
Set<Long> suids = new HashSet<>(suidSupplier.apply(first));
while (iter.hasNext()) {
AbstractDataSet dataset = iter.next();
suids.retainAll(suidSupplier.apply(dataset));
}
return suids;
}
public Set<String> getAllRankNames() {
Set<String> allRankNames = new HashSet<>();
//go through each Dataset
for (EMDataSet ds : dataSets.values()) {
// there could be duplicate ranking names for two different datasets. Add the dataset to the ranks name
Set<String> allNames = ds.getExpressionSets().getAllRanksNames();
for (String name : allNames)
allRankNames.add(name + "-" + ds.getName());
}
return allRankNames;
}
public Map<String, Ranking> getAllRanks() {
Map<String, Ranking> allranks = new HashMap<>();
for (EMDataSet dataset : dataSets.values())
allranks.putAll(dataset.getExpressionSets().getRanks());
return allranks;
}
/**
* Returns true if every data set contains exactly one Ranks object.
*/
public boolean isSingleRanksPerDataset() {
for(EMDataSet dataset : dataSets.values()) {
if(dataset.getExpressionSets().getRanks().size() != 1) {
return false;
}
}
return true;
}
/**
* Returns the total number of expressions in the map.
*/
public int totalExpressionCount() {
int count = 0;
for(EMDataSet dataset : dataSets.values()) {
count += dataset.getExpressionSets().getNumConditions() - 2;
}
return count;
}
public Ranking getRanksByName(String ranksName) {
// break the ranks file up by "-"
// check to see if the rank file is dataset specific
// needed for encoding the same ranking file name from two different dataset in the interface
String ds = "";
String rank = "";
if (ranksName.split("-").length == 2) {
ds = ranksName.split("-")[1];
rank = ranksName.split("-")[0];
}
for (Iterator<String> k = dataSets.keySet().iterator(); k.hasNext();) {
String nextDs = k.next();
if (!ds.equalsIgnoreCase("") && !rank.equalsIgnoreCase("")) {
// check that this is the right dataset
if (ds.equalsIgnoreCase(nextDs)
&& (dataSets.get(nextDs)).getExpressionSets().getAllRanksNames().contains(rank))
return dataSets.get(nextDs).getExpressionSets().getRanksByName(rank);
} else if ((dataSets.get(nextDs)).getExpressionSets().getAllRanksNames().contains(ranksName)) {
return dataSets.get(nextDs).getExpressionSets().getRanksByName(ranksName);
}
}
return null;
}
public EMSignatureDataSet getSignatureDataSet(String name) {
return signatureDataSets.get(name);
}
public void setSignatureDataSets(Collection<EMSignatureDataSet> newValue) {
synchronized (lock) {
signatureDataSets.clear();
if (newValue != null && !newValue.isEmpty()) {
for (EMSignatureDataSet sigDataSet: newValue)
addSignatureDataSet(sigDataSet);
}
}
}
public Map<String, EMSignatureDataSet> getSignatureDataSets() {
return new HashMap<>(signatureDataSets);
}
public boolean hasSignatureDataSets() {
return !signatureDataSets.isEmpty();
}
public List<EMSignatureDataSet> getSignatureSetList() {
List<EMSignatureDataSet> list = new ArrayList<>(signatureDataSets.values());
list.sort(Comparator.naturalOrder());
return list;
}
public void addSignatureDataSet(EMSignatureDataSet sigDataSet) {
synchronized (lock) {
signatureDataSets.put(sigDataSet.getName(), sigDataSet);
}
}
public void removeSignatureDataSet(EMSignatureDataSet sigDataSet) {
synchronized (lock) {
signatureDataSets.remove(sigDataSet.getName());
}
}
public void setDistinctExpressionSets(boolean d) {
this.isDistinctExpressionSets = d;
}
public boolean isDistinctExpressionSets() {
return isDistinctExpressionSets;
}
public void setLegacy(boolean legacy) {
this.isLegacy = legacy;
}
/**
* Files loaded by LegacySessionLoader should set this flag to true
*/
public boolean isLegacy() {
return isLegacy;
}
@Override
public String toString() {
return getName();
}
}