/**
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.ut.biolab.medsavant.server.ontology;
import java.io.*;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ut.biolab.medsavant.shared.model.OntologyTerm;
import org.ut.biolab.medsavant.shared.model.OntologyType;
import org.ut.biolab.medsavant.shared.util.RemoteFileCache;
/**
* Parses an OBO (Open Biomedical Ontology) file and loads it into a database table.
*
* @author tarkvara
*/
public class OBOParser {
private static final Log LOG = LogFactory.getLog(OBOParser.class);
private final OntologyType ontology;
private int lineNum;
public OBOParser(OntologyType ont) {
this.ontology = ont;
}
Map<String, OntologyTerm> load(URL source) throws FileNotFoundException, IOException {
BufferedReader reader = new BufferedReader(new FileReader(RemoteFileCache.getCacheFile(source)));
Map<String, OntologyTerm> terms = new HashMap<String, OntologyTerm>();
String line;
while ((line = reader.readLine()) != null) {
lineNum++;
if (line.equals("[Term]")) {
OntologyTerm t = parseNextTerm(reader);
if (t != null) {
if (t.getID() == null || t.getName() == null) {
LOG.info(String.format("Defective ontology term at line %d: %s/%s", lineNum, t.getID(), t.getName()));
break;
} else {
if (t.getID().startsWith("results")) {
LOG.info(String.format("Loading \"%s\" at line %d.", line, lineNum));
}
terms.put(t.getID(), t);
}
}
}
}
return terms;
}
private OntologyTerm parseNextTerm(BufferedReader reader) throws IOException {
String line;
String id = null, name = null, description = null;
List<String> altIDs = new ArrayList<String>();
List<String> parentIDs = new ArrayList<String>();
while ((line = reader.readLine()) != null) {
if (line.startsWith("[")) {
// We've hit the next term (or typedef). Rewind to the beginning of the line.
reader.reset();
break;
} else {
lineNum++;
if (line.length() > 0 && line.charAt(0) != '!') {
int colonPos = line.indexOf(":");
if (colonPos > 0) {
String key = line.substring(0, colonPos);
String value = line.substring(colonPos + 1);
int commentPos = indexOf(value, '!');
if (commentPos > 0) {
value = value.substring(0, commentPos);
}
value = value.trim();
if (key.equals("id")) {
id = value;
} else if (key.equals("alt_id")) {
altIDs.add(value);
} else if (key.equals("name")) {
name = value;
} else if (key.equals("def")) {
// Def consists of a quote-delimited string followed by some (ignored) reference material.
int quotePos = indexOf(value, '\"');
if (quotePos >= 0) {
value = value.substring(quotePos + 1);
quotePos = indexOf(value, '\"');
if (quotePos >= 0) {
value = value.substring(0, quotePos);
description = value.replace("\\", "");
}
}
} else if (key.equals("is_a")) {
parentIDs.add(value);
} else if (key.equals("is_obsolete") && value.equals("true")) {
return null;
}
}
}
reader.mark(10); // So we can rewind if the line is the next [Term].
}
}
return new OntologyTerm(ontology, id, name, description, altIDs.toArray(new String[0]), parentIDs.toArray(new String[0]));
}
/**
* Like the normal String.indexOf method, but pays attention to escape characters.
* @param s the string to be searched
* @param c the character we're looking for
*/
private static int indexOf(String s, char c) {
int pos = s.indexOf(c);
while (pos > 0 && s.charAt(pos - 1) == '\\') {
pos = s.indexOf(c, pos + 1);
}
return pos;
}
}