/*
* Created on Mar 9, 2006
*/
package org.seqcode.gsebricks.verbs.location;
import java.util.regex.*;
import org.seqcode.genome.Genome;
import org.seqcode.genome.location.NamedRegion;
import org.seqcode.genome.location.Region;
import org.seqcode.gsebricks.verbs.Mapper;
/**
* @author tdanford
*/
public class RegionParser implements Mapper<String,Region> {
private static Pattern regPatt; //Region pattern
private static Pattern sregPatt; //StrandedRegion pattern
static {
regPatt = Pattern.compile("(\\w+):(\\d+)-(\\d+)");
sregPatt = Pattern.compile("(\\w+):(\\d+)-(\\d+):(\\w+)");
}
private Genome genome;
private int chromIndex, startIndex, endIndex, nameIndex, minLength;
public RegionParser(Genome g) {
genome = g;
chromIndex = 0;
startIndex = 1;
endIndex = 2;
nameIndex = 3;
minLength = (Math.max(chromIndex, Math.max(startIndex, endIndex))) + 1;
}
/* (non-Javadoc)
* @see org.seqcode.gsebricks.verbs.Filter#execute(null)
*/
public Region execute(String input) {
String[] array = input.split("\\s+");
String chrom = array[chromIndex];
chrom = chrom.replaceFirst("chr", "");
Matcher m = regPatt.matcher(chrom);
if(m.matches()) {
chrom = m.group(1);
int start = Integer.parseInt(m.group(2));
int end = Integer.parseInt(m.group(3));
return new Region(genome, chrom, start, end);
} else {
Matcher sm = sregPatt.matcher(chrom);
if(sm.matches()) {
chrom = sm.group(1);
int start = Integer.parseInt(sm.group(2));
int end = Integer.parseInt(sm.group(3));
return new Region(genome, chrom, start, end);
}else{
if(array.length >= minLength) {
int start = Integer.parseInt(array[startIndex]);
int end = Integer.parseInt(array[endIndex]);
if(nameIndex < array.length) {
return new NamedRegion(genome, chrom, start, end, array[nameIndex]);
} else {
return new Region(genome, chrom, start, end);
}
} else {
System.err.println("Line \"" + input + "\" doesn't have the correct length (" + minLength + ")");
return null;
}
}
}
}
}