package org.baderlab.csplugins.enrichmentmap.parsers;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet;
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult;
import org.baderlab.csplugins.enrichmentmap.model.GSEAResult;
import org.cytoscape.work.AbstractTask;
import org.cytoscape.work.TaskMonitor;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.google.common.base.Strings;
public class ParseEDBEnrichmentResults extends AbstractTask {
private final EMDataSet dataset;
public ParseEDBEnrichmentResults(EMDataSet dataset) {
this.dataset = dataset;
}
@Override
public void run(TaskMonitor taskMonitor) throws Exception {
taskMonitor.setTitle("Parsing Enrichment Result file");
String enrichmentFileName1 = dataset.getEnrichments().getFilename1();
String enrichmentFileName2 = dataset.getEnrichments().getFilename2();
if(!Strings.isNullOrEmpty(enrichmentFileName1))
parse(enrichmentFileName1);
if(!Strings.isNullOrEmpty(enrichmentFileName2))
parse(enrichmentFileName2);
}
public void parse(String filePath) throws Exception {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser saxParser = spf.newSAXParser();
EDBHandler handler = new EDBHandler();
saxParser.parse(new File(filePath), handler);
dataset.getEnrichments().setEnrichments(handler.enrichmentResults);
}
private class EDBHandler extends DefaultHandler {
Map<String, EnrichmentResult> enrichmentResults = new HashMap<>();
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if("DTG".equals(qName)) {
//name - tag is GENESET but need to remove gene_sets.gmt# from the front
String name = attributes.getValue("GENESET").replace("gene_sets.gmt#", "");
//gsSize - geneset size. Get value from the number of hits in the hit indices (HIT_INDICES)
String value = attributes.getValue("HIT_INDICES");
int gsSize = (value != null) ? value.split(" ").length : 0;
//ES - enrichment score
String value2 = attributes.getValue("ES");
double ES = (value2 != null) ? Double.parseDouble(value2) : 0.0;
//NES - normalized enrichment score
String value3 = attributes.getValue("NES");
double NES = (value3 != null) ? Double.parseDouble(value3) : 0.0;
//p-value - tag is NP
String value4 = attributes.getValue("NP");
double pvalue = (value4 != null) ? Double.parseDouble(value4) : 1.0;
//FDR - false discovery rate
String value5 = attributes.getValue("FDR");
double FDR = (value5 != null) ? Double.parseDouble(value5) : 1.0;
//FWER - family wise error rate
String value6 = attributes.getValue("FWER");
double FWER = (value6 != null) ? Double.parseDouble(value6) : 1.0;
//rank_at_max - RANK_AT_ES
String value7 = attributes.getValue("RANK_AT_ES");
double rankAtMax = (value7 != null) ? Double.parseDouble(value7) : 0.0;
//score_at_max - not in the edb file but it is just the NES
double scoreAtMax = NES;
GSEAResult result = new GSEAResult(name, gsSize, ES, NES, pvalue, FDR, FWER, (int) rankAtMax, scoreAtMax);
enrichmentResults.put(result.getName(), result);
}
}
}
}