/*
* Created on Sep 25, 2007
*/
package org.seqcode.data.io.parsing;
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class CSVFileParser {
private char quote, separator;
private Pattern unquoter;
public CSVFileParser() {
separator = ',';
quote = '"';
unquoter = Pattern.compile(String.format("^%c(.*)%c$", quote, quote));
}
public CSVFileParser(char sep, char quo) {
separator = sep;
quote = quo;
unquoter = Pattern.compile(String.format("^%c(.*)%c$", quote, quote));
}
public String unquote(String f) {
Matcher m = unquoter.matcher(f);
if(m.matches()) {
return m.group(1);
} else {
return f;
}
}
public Iterator<String[]> parseFile(File f) throws IOException {
return new CSVIterator(f);
}
public String[] parseLine(String line) {
LinkedList<Integer> separatorOffsets = new LinkedList<Integer>();
boolean inQuote = false;
for(int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if(c == quote) {
inQuote = !inQuote;
} else if (c==separator && !inQuote) {
separatorOffsets.addLast(i);
}
}
return parseByOffsets(line, separatorOffsets);
}
private String[] parseByOffsets(String line, LinkedList<Integer> offs) {
int count = offs.size() + 1;
String[] array = new String[count];
int start = 0;
int idx = 0;
do {
int end = offs.isEmpty() ? line.length() : offs.removeFirst();
String str = line.substring(start, end);
array[idx++] = str;
start = end + 1;
} while(start < line.length());
return array;
}
private class CSVIterator implements Iterator<String[]> {
private BufferedReader br;
private String nextLine;
public CSVIterator(File f) throws IOException {
br = new BufferedReader(new FileReader(f));
nextLine = null;
findNextLine();
}
public boolean hasNext() { return nextLine != null; }
public String[] next() {
String nl = nextLine;
try {
findNextLine();
} catch (IOException e) {
e.printStackTrace();
nextLine = null;
try {
br.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
return parseLine(nl);
}
public void remove() { throw new UnsupportedOperationException(); }
private void findNextLine() throws IOException {
do {
nextLine = br.readLine();
} while(nextLine != null && nextLine.trim().length() == 0);
if(nextLine != null) {
nextLine = nextLine.trim();
} else {
br.close();
}
}
}
}