package no.priv.garshol.duke.utils;
import java.io.Reader;
import java.io.IOException;
import no.priv.garshol.duke.DukeException;
public class CSVReader {
private Reader in;
private char[] buf;
private int pos; // where we are in the buffer
private int len;
private String[] tmp;
private char separator;
private String file; // for error messages, can be null
public CSVReader(Reader in) throws IOException {
this(in, 65386, null);
}
public CSVReader(Reader in, int buflen, String file) throws IOException {
this.buf = new char[buflen];
this.pos = 0;
this.len = in.read(buf, 0, buf.length);
this.tmp = new String[1000];
this.in = in;
this.separator = ','; // default
this.file = file;
}
public void setSeparator(char separator) {
this.separator = separator;
}
public String[] next() throws IOException {
if (len == -1 || pos >= len)
return null;
int colno = 0;
int rowstart = pos; // used for rebuffering at end
int prev = pos - 1;
boolean escaped_quote = false; // did we find an escaped quote?
boolean startquote = false;
while (pos < len) {
startquote = false;
if (buf[pos] == '"') {
startquote = true;
prev++;
pos++;
}
// scan forward, looking for end of string
while (true) {
while (pos < len &&
(startquote || buf[pos] != separator) &&
(startquote || (buf[pos] != '\n' && buf[pos] != '\r')) &&
!(startquote && buf[pos] == '"'))
pos++;
if (pos + 1 >= len ||
(!(buf[pos] == '"' && buf[pos+1] == '"')))
break; // we found the end of this value, so stop
else {
// found a "". carry on
escaped_quote = true;
pos += 2; // step to character after next
}
}
if (escaped_quote)
tmp[colno++] = unescape(new String(buf, prev + 1, pos - prev - 1));
else
tmp[colno++] = new String(buf, prev + 1, pos - prev - 1);
if (startquote)
pos++; // step over the '"'
prev = pos;
if (pos >= len)
break; // jump out of the loop to rebuffer and try again
if (buf[pos] == '\r' || buf[pos] == '\n') {
pos++; // step over the \r or \n
if (pos >= len)
break; // jump out of the loop to rebuffer and try again
if (buf[pos] == '\n')
pos++; // step over this, too
break; // we're done
}
pos++; // step over either separator or \n
}
if (pos >= len) {
// this means we've exhausted the buffer. that again means either we've
// read the entire stream, or we need to fill up the buffer.
if (rowstart == 0 && len == buf.length)
throw new DukeException("Row length bigger than buffer size (" +
buf.length + "); unbalanced quotes? in " +
file);
System.arraycopy(buf, rowstart, buf, 0, len - rowstart);
len = len - rowstart;
int read = in.read(buf, len, buf.length - len);
if (read != -1) {
len += read;
pos = 0;
return next();
} else {
len = -1;
if (startquote) {
// did we ever see the corresponding end quote?
if ((buf[pos - 1] != '"') &&
(buf[pos - 1] != '\n' && buf[pos - 2] != '"'))
throw new DukeException("Unbalanced quote in CSV file: " + file);
}
}
}
String[] row = new String[colno];
for (int ix = 0; ix < colno; ix++)
row[ix] = tmp[ix];
return row;
}
public void close() throws IOException {
in.close();
}
private String unescape(String val) {
return val.replace("\"\"", "\"");
}
}