package org.baderlab.csplugins.enrichmentmap.parsers; import static com.google.common.base.Strings.isNullOrEmpty; import java.io.File; import java.io.IOException; import java.util.List; import org.baderlab.csplugins.enrichmentmap.model.EMDataSet; import org.cytoscape.work.AbstractTask; import org.cytoscape.work.TaskMonitor; public class ClassFileReaderTask extends AbstractTask { private final EMDataSet dataset; public ClassFileReaderTask(EMDataSet dataset) { this.dataset = dataset; } @Override public void run(TaskMonitor taskMonitor) throws Exception { taskMonitor.setTitle("Parsing class file"); String classFile = dataset.getDataSetFiles().getClassFile(); String[] classes = parseClasses(classFile); dataset.getExpressionSets().setPhenotypes(classes); } public static String[] parseClasses(String classFile) { System.out.println("ClassFileReaderTask.parseClasses(): " + classFile); if (isNullOrEmpty(classFile)) return new String[] {"NA_pos", "NA_neg"}; File f = new File(classFile); if(!f.exists()) return null; try { List<String> lines = DatasetLineParser.readLines(classFile, 4); /* * GSEA class files will have 3 lines in the following format: 6 2 1 * # R9C_8W WT_8W R9C_8W R9C_8W R9C_8W WT_8W WT_8W WT_8W * * If the file has 3 lines assume it is a GSEA and get the * phenotypes from the third line. If the file only has 1 line * assume that it is a generic class file and get the phenotypes * from the single line * the class file can be split by a space or a tab */ if(lines.size() >= 3) return lines.get(2).split("\\s"); else if(lines.size() == 1) return lines.get(0).split("\\s"); else return null; } catch (IOException ie) { System.err.println("unable to open class file: " + classFile); return null; } } }