package org.baderlab.csplugins.enrichmentmap.parsers;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.mock;
import java.util.Map;
import org.baderlab.csplugins.enrichmentmap.TestUtils;
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet;
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method;
import org.baderlab.csplugins.enrichmentmap.model.DataSetFiles;
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap;
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters;
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult;
import org.baderlab.csplugins.enrichmentmap.model.GSEAResult;
import org.baderlab.csplugins.enrichmentmap.model.GenericResult;
import org.baderlab.csplugins.enrichmentmap.model.LegacySupport;
import org.cytoscape.service.util.CyServiceRegistrar;
import org.cytoscape.work.TaskMonitor;
import org.jukito.JukitoRunner;
import org.junit.Test;
import org.junit.runner.RunWith;
import com.google.inject.Provider;
@RunWith(JukitoRunner.class)
public class FileReaderTest {
private CyServiceRegistrar serviceRegistrar = TestUtils.mockServiceRegistrar();
private TaskMonitor taskMonitor = mock(TaskMonitor.class);
@Test
public void testGMTFileReader(Provider<EnrichmentMapParameters> empFactory) throws Exception{
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/Genesetstestfile.gmt";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set gmt file name
params.getFiles().get(LegacySupport.DATASET1).setGMTFileName(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
//set up task
GMTFileReaderTask task = new GMTFileReaderTask(dataset);
task.run(taskMonitor);
//test to make sure that the file loaded in 10 genesets with a total of 75 genes
assertEquals(10, map.getAllGeneSets().size());
assertEquals(75, map.getNumberOfGenes());
}
@Test
public void testExpression1ReaderNormal(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test expression file
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/Expressiontestfile.gct";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set expression file name
params.getFiles().get(LegacySupport.DATASET1).setExpressionFileName(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
//in order to load expression data the genes have to be registered with the application
//make sure that the genes are empty
assertEquals(0, map.getNumberOfGenes());
//add the gene to the master list of genes
map.addGene("GLS");
map.addGene("PSMA1");
map.addGene("ZP1");
map.addGene("ZYX");
//make sure all four genes have been associated
assertEquals(4, map.getNumberOfGenes());
//load expression file
ExpressionFileReaderTask task = new ExpressionFileReaderTask(dataset);
task.run(taskMonitor);
//There was one more gene in the expression file that wasn't in the set of genes
//make sure it was was added
assertEquals(4, map.getNumberOfGenes());
assertEquals(4, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumGenes());
assertEquals(59, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumConditions());
assertEquals(0.008720342, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMinExpression(),0.0);
assertEquals(5.131481026, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMaxExpression(),0.0);
}
@Test
public void testExpression1ReaderCommentLines(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test expression file
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/Expressiontestfile_comments.gct";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set gmt file name
params.getFiles().get(LegacySupport.DATASET1).setExpressionFileName(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
//make sure that the genes are empty
assertEquals(0, map.getNumberOfGenes());
//add the gene to the master list of genes
map.addGene("GLS");
map.addGene("PSMA1");
map.addGene("ZP1");
map.addGene("ZYX");
//make sure all four genes have been associated
assertEquals(4, map.getNumberOfGenes());
//load expression file
ExpressionFileReaderTask task = new ExpressionFileReaderTask(dataset);
task.run(taskMonitor);
//There was one more gene in the expression file that wasn't in the set of genes
//make sure it was was added
assertEquals(4, map.getNumberOfGenes());
assertEquals(5.131481026, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMaxExpression(),0.0);
assertEquals(4, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumGenes());
assertEquals(59, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumConditions());
assertEquals(0.008720342, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMinExpression(),0.0);
}
@Test
public void testExpression1ReaderRnk(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test expression file
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/ExpressionTestFile.rnk";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set expression file name
params.getFiles().get(LegacySupport.DATASET1).setExpressionFileName(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
//make sure that the genes are empty
assertEquals(0, map.getNumberOfGenes());
//add the gene to the master list of genes
map.addGene("GLS");
map.addGene("PSMA1");
map.addGene("ZP1");
map.addGene("ZYX");
//make sure all four genes have been associated
assertEquals(4, map.getNumberOfGenes());
//load expression file
ExpressionFileReaderTask task = new ExpressionFileReaderTask(dataset);
task.run(taskMonitor);
//There was one more gene in the expression file that wasn't in the set of genes
//make sure it was was added
assertEquals(4, map.getNumberOfGenes());
assertEquals(4, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumGenes());
assertEquals(3, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumConditions());
assertEquals(0.47536945, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMinExpression(),0.0);
assertEquals(0.5418719, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMaxExpression(),0.0);
}
@Test
public void testExpression1ReaderEDBRnk(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test expression file
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/ExpressionTestFile_edbrnk.rnk";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set gmt file name
params.getFiles().get(LegacySupport.DATASET1).setExpressionFileName(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
//make sure that the genes are empty
assertEquals(0, map.getNumberOfGenes());
//add the gene to the master list of genes
map.addGene("GLS");
map.addGene("PSMA1");
map.addGene("ZP1");
map.addGene("ZYX");
//make sure all four genes have been associated
assertEquals(4, map.getNumberOfGenes());
//load expression file
ExpressionFileReaderTask task = new ExpressionFileReaderTask(dataset);
task.run(taskMonitor);
//There was one more gene in the expression file that wasn't in the set of genes
//make sure it was was added
assertEquals(4, map.getNumberOfGenes());
assertEquals(4, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumGenes());
assertEquals(3, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getNumConditions());
assertEquals(0.47536945, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMinExpression(),0.0);
assertEquals(0.5418719, map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getMaxExpression(),0.0);
}
@Test
public void testGenericFileReader_5columns(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test expression file
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/generic_enr_5col.txt";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set enrichment results file name
params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
// check if empty
assertEquals(0, map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments().size());
// read
ParseGenericEnrichmentResults task = new ParseGenericEnrichmentResults(dataset);
task.run(taskMonitor);
Map<String, EnrichmentResult> results = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
// check we have 4 results
assertEquals(4, results.size() );
// check pValues
assertEquals(0.01, ((GenericResult)results.get("GO:0000346")).getPvalue(),0.0);
assertEquals(0.05, ((GenericResult)results.get("GO:0030904")).getPvalue(),0.0);
assertEquals(0.05, ((GenericResult)results.get("GO:0008623")).getPvalue(),0.0);
assertEquals(5.60E-42, ((GenericResult)results.get("GO:0046540")).getPvalue(),0.0);
// check getFdrqvalues
assertEquals(0.02, ((GenericResult)results.get("GO:0000346")).getFdrqvalue(),0.0);
assertEquals(0.10, ((GenericResult)results.get("GO:0030904")).getFdrqvalue(),0.0);
assertEquals(0.12, ((GenericResult)results.get("GO:0008623")).getFdrqvalue(),0.0);
assertEquals(0.03, ((GenericResult)results.get("GO:0046540")).getFdrqvalue(),0.0);
// check phenotypes
assertEquals( 1.0, ((GenericResult)results.get("GO:0000346")).getNES(),0.0);
assertEquals( 1.0, ((GenericResult)results.get("GO:0030904")).getNES(),0.0);
assertEquals(-1.0, ((GenericResult)results.get("GO:0008623")).getNES(),0.0);
assertEquals(-1.0, ((GenericResult)results.get("GO:0046540")).getNES(),0.0);
return;
}
//test GSEA enrichment results reader
@Test
public void testGSEAEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test enrichment files - GSEA creates two enrichment results files.
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/GSEA_enrichments1.xls";
String testDataFileName2 ="src/test/resources/org/baderlab/csplugins/enrichmentmap/GSEA_enrichments2.xls";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set enrichment file name
params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName2(testDataFileName2);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
ParseGSEAEnrichmentResults task = new ParseGSEAEnrichmentResults(dataset);
task.run(taskMonitor);
//Get the enrichment
Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
assertEquals(40,enrichments.size());
//Check the contents of some of the genesets
// example from file 1 (ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612)
//check p-values
assertEquals(0.0,((GSEAResult)enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getPvalue(),0.0);
//check fdr value
assertEquals(0.086938426,((GSEAResult)enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getFdrqvalue(),0.0);
//check ES value
assertEquals(0.6854155,((GSEAResult)enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getES(),0.0);
//check NES
assertEquals(2.1194055,((GSEAResult)enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getNES(),0.0);
//check ranks at max
assertEquals(836,((GSEAResult)enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getRankAtMax());
//check size
assertEquals(27,((GSEAResult)enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getGsSize());
// example from file 2 (EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143)
//check p-values
assertEquals(0.040152963,((GSEAResult)enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getPvalue(),0.0);
//check fdr value
assertEquals(1.0,((GSEAResult)enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getFdrqvalue(),0.0);
//check ES value
assertEquals(-0.49066687,((GSEAResult)enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getES(),0.0);
//check NES
assertEquals(-1.477554,((GSEAResult)enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getNES(),0.0);
//check ranks at max
assertEquals(1597,((GSEAResult)enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getRankAtMax());
//check size
assertEquals(17,((GSEAResult)enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getGsSize());
}
//test GSEA enrichment results reader
@Test
public void testGSEAEDBEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test enrichment files - GSEA creates two enrichment results files.
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/LoadDataset/GSEA_example_results/edb/results.edb";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set enrichment file name
params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
ParseEDBEnrichmentResults task = new ParseEDBEnrichmentResults(dataset);
task.run(taskMonitor);
//Get the enrichment
Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
assertEquals(14,enrichments.size());
//Check the contents of some of the genesets
// example from file 1 (ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612)
//check p-values
assertEquals(0.2271,((GSEAResult)enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getPvalue(),0.0);
//check fdr value
assertEquals(0.2447,((GSEAResult)enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getFdrqvalue(),0.0);
//check ES value
assertEquals(0.7852,((GSEAResult)enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getES(),0.0);
//check NES
assertEquals(1.1793,((GSEAResult)enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getNES(),0.0);
//check ranks at max
assertEquals(6,((GSEAResult)enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getRankAtMax());
//check size
assertEquals(2,((GSEAResult)enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getGsSize());
// example from file 2 (EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143)
//check p-values
assertEquals(0.4545,((GSEAResult)enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getPvalue(),0.0);
//check fdr value
assertEquals(0.8650,((GSEAResult)enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getFdrqvalue(),0.0);
//check ES value
assertEquals(-0.4707,((GSEAResult)enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getES(),0.0);
//check NES
assertEquals(-0.9696,((GSEAResult)enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getNES(),0.0);
//check ranks at max
//The Rank at max in the edb file is different from the excel files. In the excel file that we have been
// using up until now they convert the rank as if you are counting from the bottom of the list but in the
//edb file they count from the top of the ranked list (going from positive to negative ES scores)
assertEquals(15,((GSEAResult)enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getRankAtMax());
//check size
assertEquals(39,((GSEAResult)enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getGsSize());
}
//test Bingo enrichment results
@Test
public void testBingoEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test enrichment files - Bingo
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/BingoResults.bgo";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set enrichment file name
params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
ParseBingoEnrichmentResults task = new ParseBingoEnrichmentResults(dataset);
task.run(taskMonitor);
//Get the enrichment
Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
assertEquals(74,enrichments.size());
//check p-values
assertEquals(0.0010354,((GenericResult)enrichments.get("NUCLEOLAR PART")).getPvalue(),0.0);
//check fdr value
assertEquals(0.047796,((GenericResult)enrichments.get("NUCLEOLAR PART")).getFdrqvalue(),0.0);
//check geneset siz
assertEquals(5,((GenericResult)enrichments.get("NUCLEOLAR PART")).getGsSize());
//check p-values
assertEquals(0.0000000000016209,((GenericResult)enrichments.get("NUCLEOLUS")).getPvalue(),0.0);
//check fdr value
assertEquals(0.0000000042203,((GenericResult)enrichments.get("NUCLEOLUS")).getFdrqvalue(),0.0);
//check geneset siz
assertEquals(43,((GenericResult)enrichments.get("NUCLEOLUS")).getGsSize());
}
//test David enrichment results reader
@Test
public void testDavidEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception{
//load the test enrichment files - Bingo
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/DavidResults.txt";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set enrichment file name
params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
ParseDavidEnrichmentResults task = new ParseDavidEnrichmentResults(dataset);
task.run(taskMonitor);
//Get the enrichment
Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
assertEquals(215,enrichments.size());
//check p-values
assertEquals(0.00000005210169741980237,((GenericResult)enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getPvalue(),0.0);
//check fdr value
assertEquals(0.000016724505445320226,((GenericResult)enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getFdrqvalue(),0.0);
//check geneset siz
assertEquals(95,((GenericResult)enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getGsSize());
//check p-values
assertEquals(0.0009179741851709047,((GenericResult)enrichments.get(((String)"domain:Leucine-zipper").toUpperCase())).getPvalue(),0.0);
//check fdr value
assertEquals(0.46717397126592464,((GenericResult)enrichments.get(((String)"domain:Leucine-zipper").toUpperCase())).getFdrqvalue(),0.0);
//check geneset siz
assertEquals(11,((GenericResult)enrichments.get(((String)"domain:Leucine-zipper").toUpperCase())).getGsSize());
}
}