package edu.mssm.pharm.maayanlab.Enrichr; import java.io.File; import java.text.DecimalFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.logging.Level; import java.util.logging.Logger; import javax.swing.SwingWorker; import edu.mssm.pharm.maayanlab.Enrichr.ResourceLoader.EnrichmentCategory; import edu.mssm.pharm.maayanlab.Enrichr.ResourceLoader.EnrichmentLibrary; import edu.mssm.pharm.maayanlab.common.bio.EnrichedTerm; import edu.mssm.pharm.maayanlab.common.bio.FuzzyGeneSetLibrary; import edu.mssm.pharm.maayanlab.common.bio.GeneSetLibrary; import edu.mssm.pharm.maayanlab.common.core.FileUtils; import edu.mssm.pharm.maayanlab.common.core.Settings; import edu.mssm.pharm.maayanlab.common.core.SettingsChanger; import edu.mssm.pharm.maayanlab.common.core.SimpleXMLWriter; public class EnrichrBatcher implements SettingsChanger { private static Logger log = Logger.getLogger("maayanlab"); // progress tracking private SwingWorker<Void, Void> task = null; private int progress = 0; private String note = ""; private boolean isCancelled = false; // Default settings private final Settings settings = new Settings() { { for (EnrichmentCategory category : ResourceLoader.getInstance().getCategories()) { for (EnrichmentLibrary library : category.getLibraries()) { set(library.getName(), true); } } } }; // Formatter private final DecimalFormat scientificNotation = new DecimalFormat("0.##E0"); // Output header public static final String HEADER = "Term\tOverlap\tP-value\tGenes"; private HashMap<String, ArrayList<EnrichedTerm>> resultsMap = new HashMap<String, ArrayList<EnrichedTerm>>(18); public static void main(String[] args) { // Set logger display level if (!Boolean.getBoolean("verbose")) log.setLevel(Level.WARNING); if (args.length == 2) { EnrichrBatcher eb = new EnrichrBatcher(); File inputDir = new File(args[0]); File outputDir = new File(args[1]); if (inputDir.isDirectory()) { for (File child : inputDir.listFiles()) { eb.run(child.getAbsolutePath()); if (outputDir.isDirectory()) { eb.writeFile(new File(outputDir, FileUtils.stripFileExtension(child.getName()) + ".enrichment.xml").getAbsolutePath()); } else { eb.writeFile(new File(outputDir.getParent(), FileUtils.stripFileExtension(child.getName()) + "." + outputDir.getName()).getAbsolutePath()); } } } else { eb.run(args[0]); eb.writeFile(args[1]); } } else if (args.length == 3) { EnrichrBatcher eb = new EnrichrBatcher(); eb.run(args[0], args[1], false, args[2]); } else if (args.length == 4) { EnrichrBatcher eb = new EnrichrBatcher(); eb.run(args[0], args[1], Boolean.parseBoolean(args[2]), args[3]); } else log.warning("Usage: java -jar Enrichr.jar gene_list [background_file is_fuzzy] output"); } // By default, load settings from file public EnrichrBatcher() { settings.loadSettings(); } // Load external settings, primarily for use with X2K public EnrichrBatcher(Settings externalSettings) { settings.loadSettings(externalSettings); } // Task methods public void setTask(SwingWorker<Void, Void> task) { this.task = task; } private void setProgress(int progress, String note) throws InterruptedException { if (task != null) { if (isCancelled) throw new InterruptedException("Task cancelled at " + progress + "%!"); task.firePropertyChange("progress", this.progress, progress); task.firePropertyChange("note", this.note, note); this.progress = progress; this.note = note; } } public void cancel() { isCancelled = true; } @Override // Used for other methods to set settings public void setSetting(String key, String value) { settings.set(key, value); } public HashMap<String, ArrayList<EnrichedTerm>> getEnrichmentResults() { return resultsMap; } public void writeFile(String filename) { // Prefix for individual files String outputPrefix = filename.replaceFirst("\\.\\w+$", ""); SimpleXMLWriter sxw = new SimpleXMLWriter(filename); sxw.startPlainElement("Enrichment"); sxw.startPlainElement("Summary"); LinkedList<EnrichedTerm> combinedTerms = new LinkedList<EnrichedTerm>(); for (ArrayList<EnrichedTerm> termList : resultsMap.values()) combinedTerms.addAll(termList); Collections.sort(combinedTerms); // Filter down to top 10 while (combinedTerms.size() > 10) combinedTerms.removeLast(); for (EnrichedTerm term : combinedTerms) sxw.listElement("Term", term.getName(), "p-value", scientificNotation.format(term.getPValue())); sxw.endElement(); for (String bgType : resultsMap.keySet()) { // Write XML summary output sxw.startElementWithAttributes("Background", "name", bgType); int i = 1; for (EnrichedTerm term : resultsMap.get(bgType)) { sxw.listElement("Term", term.getName(), "p-value", scientificNotation.format(term.getPValue())); // Stop after 10 entries if (i++ >= 10) break; } sxw.endElement(); // Write individual enrichment tsv outputs FileUtils.writeFile(outputPrefix + "_" + bgType + ".txt", Enrichment.HEADER, resultsMap.get(bgType)); } sxw.close(); } // Run from cli with custom database public void run(String geneList, String backgroundFile, boolean isFuzzy, String outputFile) { GeneSetLibrary geneSetLibrary; log.info("Running with custom database"); try { Enrichment app = new Enrichment(FileUtils.readFile(geneList), true); if (isFuzzy) geneSetLibrary = new FuzzyGeneSetLibrary(FileUtils.readFile(backgroundFile)); else geneSetLibrary = new GeneSetLibrary(FileUtils.readFile(backgroundFile)); FileUtils.writeFile(outputFile, Enrichment.HEADER, app.enrich(geneSetLibrary)); } catch (ParseException e) { log.warning(e.getMessage()); System.exit(-1); } } // Run for file names public void run(String geneList) { ArrayList<String> inputList = FileUtils.readFile(geneList); try { run(inputList); } catch (ParseException e) { log.warning(e.getMessage()); } } // Run for calling from other methods and pass in collection public void run(Collection<String> geneList) throws ParseException { LinkedList<String> bgList = new LinkedList<String>(); for (EnrichmentCategory category : ResourceLoader.getInstance().getCategories()) { for (EnrichmentLibrary library : category.getLibraries()) { if (settings.getBoolean(library.getName())) { bgList.add(library.getName()); } } } try { setProgress(0, "Enriching terms..."); computeEnrichment(bgList, geneList); setProgress(95, "Writing results..."); } catch (InterruptedException e) { log.info(e.getMessage()); return; } } public void computeEnrichment(LinkedList<String> backgroundList, Collection<String> geneList) throws InterruptedException, ParseException { int iteration = 0; int increment = 80 / backgroundList.size(); Enrichment app = new Enrichment(geneList, true); for (String bgType : backgroundList) { try { setProgress(5+increment*iteration, bgType.replace("_", " ") + " enrichment..."); iteration++; } catch (InterruptedException e) { throw new InterruptedException(e.getMessage()); } ArrayList<EnrichedTerm> resultTerms = app.enrich(bgType); // Only add to results if there are actual results if (!resultTerms.isEmpty()) resultsMap.put(bgType, resultTerms); } } }