/*
* Created on Oct 6, 2005
*/
package org.seqcode.data.io.parsing;
import java.util.*;
import org.seqcode.gseutils.Pair;
import java.io.*;
/**
* @author tdanford
*
* <code>FASTAStream</code> is an iterator over the sequences in a FASTA file.
* Each sequence is returned as a Pair of Strings: the first String is the
* name of the sequence and the second String is the sequence itself.
*/
public class FASTAStream implements Iterator<Pair<String,String>>, org.seqcode.gseutils.Closeable {
private String label;
private BufferedReader br;
private String pendingName;
public FASTAStream(File f) throws IOException {
br = new BufferedReader(new FileReader(f));
label = f.getAbsolutePath();
init();
}
public FASTAStream(BufferedReader r) throws IOException {
br= r;
label = "inputstream";
init();
}
private void init() throws IOException {
pendingName = null;
String line;
boolean searching = true;
while(searching && (line = br.readLine()) != null) {
line = line.trim();
if(line.length() > 0 && line.charAt(0) == '>') {
pendingName = parsePendingName(line);
searching = false;
}
}
}
private String parsePendingName(String n) {
String base = n.trim();
if(base.length() > 0 && base.charAt(0) == '>') {
base = base.substring(1, base.length());
}
return base;
}
/* (non-Javadoc)
* @see org.seqcode.gse.hyperdrive.utils.Stream#hasNext()
*/
public boolean hasNext() {
return pendingName != null;
}
/* (non-Javadoc)
* @see org.seqcode.gse.hyperdrive.utils.Stream#next()
*/
public Pair<String,String> next() {
String line = null;
String name = pendingName;
pendingName = null;
StringBuilder sb = new StringBuilder();
boolean reading = true;
try {
while(reading && (line = br.readLine()) != null) {
line = line.trim();
if(line.length() > 0 && line.charAt(0) == '>') {
pendingName = parsePendingName(line);
reading = false;
} else {
sb.append(line);
}
}
} catch(IOException ie) {
ie.printStackTrace(System.err);
}
if(pendingName == null) { close(); }
Pair<String,String> result = new Pair<String,String>(name, sb.toString());
sb = null;
return result;
}
/* (non-Javadoc)
* @see org.seqcode.gse.hyperdrive.utils.Stream#getDescriptor()
*/
public String getDescriptor() {
return "FASTA(" + label + ")";
}
/* (non-Javadoc)
* @see org.seqcode.gseutils.Closeable#close()
*/
public void close() {
if (isClosed()) {return;}
try {
br.close();
} catch(IOException ie) {
ie.printStackTrace(System.err);
}
pendingName = null;
br = null;
}
/* (non-Javadoc)
* @see org.seqcode.gseutils.Closeable#isClosed()
*/
public boolean isClosed() {
return br == null;
}
public void remove() {
throw new UnsupportedOperationException("Can't remove from a FASTAStream");
}
}