/** ** EnrichmentMap Cytoscape Plugin ** ** Copyright (c) 2008-2009 Bader Lab, Donnelly Centre for Cellular and Biomolecular ** Research, University of Toronto ** ** Contact: http://www.baderlab.org ** ** Code written by: Ruth Isserlin ** Authors: Daniele Merico, Ruth Isserlin, Oliver Stueker, Gary D. Bader ** ** This library is free software; you can redistribute it and/or modify it ** under the terms of the GNU Lesser General Public License as published ** by the Free Software Foundation; either version 2.1 of the License, or ** (at your option) any later version. ** ** This library is distributed in the hope that it will be useful, but ** WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF ** MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. The software and ** documentation provided hereunder is on an "as is" basis, and ** University of Toronto ** has no obligations to provide maintenance, support, updates, ** enhancements or modifications. In no event shall the ** University of Toronto ** be liable to any party for direct, indirect, special, ** incidental or consequential damages, including lost profits, arising ** out of the use of this software and its documentation, even if ** University of Toronto ** has been advised of the possibility of such damage. ** See the GNU Lesser General Public License for more details. ** ** You should have received a copy of the GNU Lesser General Public License ** along with this library; if not, write to the Free Software Foundation, ** Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ** **/ // $Id$ // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // $HeadURL$ package org.baderlab.csplugins.enrichmentmap.parsers; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.text.Normalizer; import java.util.Map; import java.util.regex.Pattern; import org.baderlab.csplugins.enrichmentmap.model.EMDataSet; import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap; import org.baderlab.csplugins.enrichmentmap.model.GeneSet; import org.baderlab.csplugins.enrichmentmap.model.SetOfGeneSets; import org.cytoscape.work.AbstractTask; import org.cytoscape.work.TaskMonitor; import com.google.common.collect.ImmutableSet; /** * This class parses a GMT (gene set) file and creates a set of genesets */ public class GMTFileReaderTask extends AbstractTask { private final EnrichmentMap map; private final String gmtFileName; private final SetOfGeneSets setOfgenesets; public GMTFileReaderTask(EMDataSet dataset) { this.map = dataset.getMap(); this.gmtFileName = dataset.getSetOfGeneSets().getFilename(); this.setOfgenesets = dataset.getSetOfGeneSets(); } /** * for BuildDiseaseSignatureTask */ public GMTFileReaderTask(EnrichmentMap map, String gmtFileName, SetOfGeneSets setOfgensets) { this.map = map; this.gmtFileName = gmtFileName; this.setOfgenesets = setOfgensets; } @Override public void run(TaskMonitor taskMonitor) throws Exception { taskMonitor.setTitle("Parsing GMT file"); parse(); } public void parse() throws IOException, InterruptedException { try(BufferedReader reader = new BufferedReader(new FileReader(gmtFileName))) { for(String line; (line = reader.readLine()) != null;) { if(cancelled) { throw new InterruptedException(); } GeneSet gs = readGeneSet(map, line); if(gs != null && setOfgenesets != null) { Map<String, GeneSet> genesets = setOfgenesets.getGeneSets(); genesets.put(gs.getName(), gs); } } } } private static GeneSet readGeneSet(EnrichmentMap map, String line) { String[] tokens = line.split("\t"); //only go through the lines that have at least a gene set name and description. if(tokens.length >= 2) { // set of genes keys ImmutableSet.Builder<Integer> builder = ImmutableSet.builder(); String name = tokens[0].toUpperCase().trim(); String description = tokens[1].trim(); for(int i = 2; i < tokens.length; i++) { String gene = tokens[i].toUpperCase(); if(map.containsGene(gene)) { builder.add(map.getHashFromGene(gene)); } else if(!gene.isEmpty()) { Integer hash = map.addGene(gene).get(); builder.add(hash); } } return new GeneSet(name, description, builder.build()); } return null; } private String deAccent(String str) { String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); return pattern.matcher(nfdNormalizedString).replaceAll(""); } }