package org.seqcode.data.io; import java.util.Iterator; import java.util.regex.*; import java.io.*; import org.seqcode.genome.Genome; import org.seqcode.genome.location.Region; import org.seqcode.genome.location.StrandedRegion; import org.seqcode.genome.sequence.SequenceGenerator; import org.seqcode.gsebricks.iterators.SingleIterator; import org.seqcode.gsebricks.verbs.ExpanderIterator; import org.seqcode.gsebricks.verbs.FileLineExpander; import org.seqcode.gsebricks.verbs.Sink; import org.seqcode.gseutils.Closeable; import org.seqcode.gseutils.NotFoundException; /** * Writes the sequence contained in the input Regions * to a FASTA file */ public class FASTAWriter<X extends Region> implements Sink<X>, Closeable { public static void main(String[] args) { FASTAWriter writer = new FASTAWriter(System.out); Pattern p = Pattern.compile("(.*):([\\d]+)-([\\d]+)"); try { Genome genome = Genome.findGenome(args[0]); SingleIterator<File> fitr = new SingleIterator<File>(new File(args[1])); Iterator<String> lines = new ExpanderIterator<File,String>(new FileLineExpander(), fitr); while(lines.hasNext()) { String line = lines.next().trim(); Matcher m = p.matcher(line); if(m.matches()) { String chrom = m.group(1); if(chrom.startsWith("chr")) { chrom = chrom.substring(3, chrom.length()); } int start = Integer.parseInt(m.group(2)); int end = Integer.parseInt(m.group(3)); Region r = new Region(genome, chrom, start, end); writer.consume(r); } else { System.err.println("Can't match \"" + line + "\""); } } } catch (NotFoundException e) { e.printStackTrace(); } } private PrintStream ps; private FileOutputStream os; private SequenceGenerator seqgen; private int lineLength; public FASTAWriter(File f) throws IOException { os = new FileOutputStream(f); ps = new PrintStream(os); seqgen = new SequenceGenerator(); lineLength = 100; } public FASTAWriter (String fname) throws IOException, FileNotFoundException { os = new FileOutputStream(fname); ps = new PrintStream(os); seqgen = new SequenceGenerator(); lineLength = 100; } public FASTAWriter(PrintStream ps) { seqgen = new SequenceGenerator(); this.ps = ps; os = null; lineLength = 100; } public void setLineLength(int ll) { lineLength = ll; } public void init() {} public void useCache(boolean b) { seqgen.useCache(b); } public void consume(Iterator<X> iter) { while (iter.hasNext()) { consume(iter.next()); } } public void finish() { close(); } public void consume(X r) { Genome g = r.getGenome(); String s = seqgen.execute(r); if (r instanceof StrandedRegion) { if (((StrandedRegion)r).getStrand() == '-') { s = org.seqcode.genome.sequence.SequenceUtils.reverseComplement(s); } } int start = 0; int len = s.length(); ps.println(">" + r.toString()); while (start < len) { int end = start + lineLength; if (end > len) { end = len; } ps.println(s.substring(start,end)); start+= lineLength; } } public void close() { try { ps.close(); if (os != null) { os.close(); } ps.close(); seqgen = null; os = null; ps = null; } catch (IOException ex) { throw new RuntimeException(ex.toString(),ex); } } public boolean isClosed() {return ps == null;} }