package org.baderlab.csplugins.enrichmentmap.resolver; import java.io.IOException; import java.nio.file.Files; import java.nio.file.InvalidPathException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.stream.Stream; import org.baderlab.csplugins.enrichmentmap.model.DataSetFiles; import org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method; public class GSEAResolver { /** * Attempts to create a DataSetParameters object from a GSEA results folder. */ public static Optional<DataSetParameters> resolveGSEAResultsFolder(Path gseaFolder) { try { Optional<DataSetParameters> rpt = resolveRPTFromGSEAFolder(gseaFolder); if(rpt.isPresent()) return rpt; Optional<DataSetParameters> edb = resolveEDBFromGSEAFolder(gseaFolder); if(edb.isPresent()) return edb; } catch(IOException e) { e.printStackTrace(); } return Optional.empty(); } public static Optional<DataSetParameters> resolveRPTFromGSEAFolder(Path gseaFolder) throws IOException { Optional<Path> rptFileOpt = getFileEndingWith(gseaFolder, ".rpt"); if(!rptFileOpt.isPresent()) return Optional.empty(); return resolveRPT(gseaFolder, rptFileOpt.get()); } public static Optional<DataSetParameters> resolveRPTFile(Path rptFile) throws IOException { Path gseaFolder = rptFile.getParent(); return resolveRPT(gseaFolder, rptFile); } private static Optional<DataSetParameters> resolveRPT(Path gseaFolder, Path rptFile) { Optional<Map<String,String>> optParams = parseRPTParameters(rptFile); if(!optParams.isPresent()) return Optional.empty(); Map<String,String> params = optParams.get(); // Attempt to resolve the files from the RPT Optional<Path> gmtPath = getRptGmt(gseaFolder, params); String[] phenotypes = getRptPhenotypes(params); Optional<Path> classes = getRptClassFile(params); String timestamp = params.get("producer_timestamp"); String results1FileName = "gsea_report_for_" + phenotypes[0] + "_" + timestamp + ".xls"; String results2FileName = "gsea_report_for_" + phenotypes[1] + "_" + timestamp + ".xls"; String rnkFileName = "ranked_gene_list_" + phenotypes[0] + "_versus_" + phenotypes[1] +"_" + timestamp + ".xls"; Optional<Path> results1 = getRptResultsFile(gseaFolder, results1FileName, params); Optional<Path> results2 = getRptResultsFile(gseaFolder, results2FileName, params); Optional<Path> rnk = getRptResultsFile(gseaFolder, rnkFileName, params); Optional<Path> expr = getRptExpressionFile(params); if(!gmtPath.isPresent() && !results1.isPresent() && !results2.isPresent() && !rnk.isPresent() && !expr.isPresent()) return Optional.empty(); DataSetFiles files = new DataSetFiles(); files.setPhenotype1(phenotypes[0]); files.setPhenotype2(phenotypes[1]); gmtPath.ifPresent(path -> files.setGMTFileName(path.toString())); results1.ifPresent(path -> files.setEnrichmentFileName1(path.toString())); results2.ifPresent(path -> files.setEnrichmentFileName2(path.toString())); rnk.ifPresent(path -> files.setRankedFile(path.toString())); expr.ifPresent(path -> files.setExpressionFileName(path.toString())); classes.ifPresent(path -> files.setClassFile(path.toString())); return Optional.of(new DataSetParameters(getDatasetNameGSEA(gseaFolder), Method.GSEA, files)); } private static Optional<Path> getRptExpressionFile(Map<String,String> params) { String method = params.get("producer_class").split("\\p{Punct}")[2]; // Gsea or GseaPreranked String data; if(method.equalsIgnoreCase("Gsea")) { data = params.get("param res"); } else if(method.equalsIgnoreCase("GseaPreranked")) { data = params.get("param rnk"); if(params.containsKey("param expressionMatrix")) { data = params.get("param expressionMatrix"); } } else { return Optional.empty(); } try { Path exprfile = Paths.get(data); if(Files.exists(exprfile)) return Optional.of(exprfile); } catch(InvalidPathException e) { e.printStackTrace(); } return Optional.empty(); } private static Optional<Path> getRptResultsFile(Path root, String fileName, Map<String,String> params) { // RPT files contain absolute paths from the machine where the GSEA analysis was run. // If the user moves the GSEA folder somewhere else then the paths won't resolve. // We can still attempt to find the files in the same folder where the RPT file is located. String label = params.get("param rpt_label"); String method = params.get("producer_class").split("\\p{Punct}")[2]; // Gsea or GseaPreranked String timestamp = params.get("producer_timestamp"); String out_dir = params.get("param out"); String job_dir_name = label + "." + method + "." + timestamp; // attempt to find the file using the path in the RPT file try { Path abs = Paths.get(out_dir, job_dir_name, fileName); if(Files.exists(abs)) { return Optional.of(abs); } } catch(InvalidPathException e) { e.printStackTrace(); } try { // attempt to find the file under the folder containing the RPT file Path rel = root.resolve(fileName); if(Files.exists(rel)) { return Optional.of(rel); } } catch(InvalidPathException e) { e.printStackTrace(); } return Optional.empty(); } private static Optional<Map<String,String>> parseRPTParameters(Path rptFile) { try(Stream<String> stream = Files.lines(rptFile)) { Map<String,String> params = new HashMap<>(); for(String line : (Iterable<String>)stream::iterator) { String[] tokens = line.split("\t"); if(tokens.length == 2) params.put(tokens[0] ,tokens[1]); else if(tokens.length == 3) params.put(tokens[0] + " " + tokens[1], tokens[2]); } return Optional.of(params); } catch (IOException e) { e.printStackTrace(); return Optional.empty(); } } private static Optional<Path> getRptGmt(Path root, Map<String,String> params) { // Use the original gmt if we can find it. // If we can't find it resort to using the the one from edb directory. String gmtParam = params.get("param gmx"); try { Path rptGmtPath = Paths.get(gmtParam); if(Files.exists(rptGmtPath)) return Optional.of(rptGmtPath); } catch(InvalidPathException e) { e.printStackTrace(); } try { Path edbGmtPath = root.resolve("edb/gene_sets.gmt"); if(Files.exists(edbGmtPath)) return Optional.of(edbGmtPath); } catch(InvalidPathException e) { e.printStackTrace(); } return Optional.empty(); } private static String[] getRptPhenotypes(Map<String, String> params) { String classes = params.get("param cls"); String method = params.get("producer_class").split("\\p{Punct}")[2]; // Gsea or GseaPreranked String phenotype1 = "na"; String phenotype2 = "na"; if (classes != null && method.equalsIgnoreCase("Gsea")) { String[] classes_split = classes.split("#"); // only and try parse classes out of label if they are there if (classes_split.length >= 2) { String phenotypes = classes_split[1]; String[] phenotypes_split = phenotypes.split("_versus_"); if (phenotypes_split.length >= 2) { phenotype1 = phenotypes_split[0]; phenotype2 = phenotypes_split[1]; } } } else if (method.equalsIgnoreCase("GseaPreranked")) { phenotype1 = "na_pos"; phenotype2 = "na_neg"; if (params.containsKey("param phenotypes")) { String phenotypes = params.get("param phenotypes"); String[] phenotypes_split = phenotypes.split("_versus_"); if (phenotypes_split.length >= 2) { phenotype1 = phenotypes_split[0]; phenotype2 = phenotypes_split[1]; } } } return new String[] {phenotype1, phenotype2}; } private static Optional<Path> getRptClassFile(Map<String, String> params) { String classes = params.get("param cls"); String method = params.get("producer_class").split("\\p{Punct}")[2]; // Gsea or GseaPreranked if (classes != null && method.equalsIgnoreCase("Gsea")) { String[] classes_split = classes.split("#"); try { Path path = Paths.get(classes_split[0]); if(Files.exists(path)) { return Optional.of(path); } } catch(InvalidPathException e) { e.printStackTrace(); } } return Optional.empty(); } private static Optional<DataSetParameters> resolveEDBFromGSEAFolder(Path root) { if(hasEdbData(root)) return Optional.of(toDataSetParametersEDB(root)); return Optional.empty(); } private static boolean hasEdbData(Path p) { Path edbPath = p.resolve("edb"); try { return Files.exists(edbPath) && containsFileEndingWith(edbPath, ".rnk") && containsFileEndingWith(edbPath, ".gmt") && containsFileEndingWith(edbPath, ".edb"); } catch(IOException e) { e.printStackTrace(); return false; } } public static boolean isGSEAResultsFolder(Path p) { return hasEdbData(p); } public static DataSetFiles toDataSetFilesEDB(Path path) { DataSetFiles files = new DataSetFiles(); files.setEnrichmentFileName1(path.resolve(Paths.get("edb/results.edb")).toString()); files.setGMTFileName(path.resolve(Paths.get("edb/gene_sets.gmt")).toString()); return files; } public static DataSetParameters toDataSetParametersEDB(Path root) { return new DataSetParameters(getDatasetNameGSEA(root), Method.GSEA, toDataSetFilesEDB(root)); } public static String getDatasetNameGSEA(Path folder) { String folderName = folder.getFileName().toString(); int dotIndex = folderName.indexOf('.'); if(dotIndex == -1) return folderName; else return folderName.substring(0, dotIndex); } private static boolean containsFileEndingWith(Path p, String suffix) throws IOException { return getFileEndingWith(p, suffix).isPresent(); } private static Optional<Path> getFileEndingWith(Path p, String suffix) throws IOException { return Files.find(p, 1, (path, attrs) -> endsWithIgnoreCase(path.getFileName().toString(), suffix) ).findFirst(); } private static boolean endsWithIgnoreCase(String s, String suffix) { return s.regionMatches(true, s.length()-suffix.length(), suffix, 0, suffix.length()); } }