package biz.c24.io.spring.batch.reader.source; import java.io.IOException; import java.io.Reader; /** * Utility class to rapidly split up data into lines. * Created instead of using a BufferedReader or Scanner as we need the ability to 'push back' at most 1 line of data while also * splitting lines on an arbitrary character * * General performance of readLine is on a par with BufferedReader; however the optimise checks add around 1% in the worst case. * In the best case (where only a '\r' or '\n' is used as the line terminator or readUntil is used) performance is 15-20% faster * than BufferedReader. * * @author Andrew Elmore * */ public class SplittingReader extends Reader { /** * Where we actually get our source data from */ private Reader sourceReader; /** * Cache for data read from the sourceReader */ private char[] buffer = new char[10000]; /** * Current index into the buffer */ private int index = 0; /** * Index in the buffer up to which data is populated */ private int endIndex = 0; /** * Tracks whether we've been closed or not */ boolean isOpen = true; /** * Allow up to one 'line' of data to be pushed back. Will be returned by any calls to readLine/Until prior to consuming more * data from the buffer. */ private String cached = null; /** * If we detect a single-character line terminator, can we assume that all lines use that terminator? */ private final boolean consistentLineTerminators; /** * Single character line terminator if detected */ private Character terminator = null; public SplittingReader(Reader reader) { this.sourceReader = reader; this.consistentLineTerminators = false; } /** * * @param reader The underlying Reader to extract data from * @param consistentLineTerminators Set to true if all lines use the same line terminator for an approx 15% speed boost */ public SplittingReader(Reader reader, boolean consistentLineTerminators) { this.sourceReader = reader; this.consistentLineTerminators = consistentLineTerminators; } public Reader getReader() { return sourceReader; } /** * Overwrites the contents of the current buffer with more data if available * * @return True iff we read more data from the underlying sourceReader * @throws IOException */ private boolean fillBuffer() throws IOException { if(endIndex >= 0) { endIndex = sourceReader.read(buffer, 0, buffer.length); } return endIndex > 0; } /** * Extracts characters from the data stream until either: * a) we run out of characters to read or * b) the next character to be read matches c * * In other words c is not included at the end of the stream but will be the first character * of the next String read via this method. * * * @param c The character to stop extracting on. * @return The extracted string * @throws IOException */ public String readUntil(char c) throws IOException { String result = null; if(cached != null) { result = cached; cached = null; } else { boolean parsing = true; while(parsing) { // Skip the first character - if it matches c, we want the next one anyway int i = result == null? index + 1 : index; // As odd as this construction looks, we get approx 6% speed increase over a straight while loop and updating the member var in place for(; i < endIndex; i++) { if(buffer[i] == c) { parsing = false; break; } } if(i > index && i <= endIndex) { // Cache what we have so far String fragment = new String(buffer, index, i - index); result = result == null? fragment : result + fragment; } if(parsing) { // We're here because we ran out of data. See if there's any more if(fillBuffer()) { index = 0; } else { parsing = false; } } else { index = i; } } } return result; } /** * Extracts characters from the data stream until either: * a) we run out of characters to read or * b) the last character matches c * * Unlike readUntil, c will be included as the last character of the returned string. * Subsequent calls with start with the next character. * * @param c The character to stop extracting on. * @return The extracted String * @throws IOException */ public String readUntilInclusive(char c) throws IOException { String result = null; if(cached != null) { result = cached; cached = null; } else { boolean parsing = true; while(parsing) { int i = index; // As odd as this construction looks, we get approx 6% speed increase over a straight while loop and updating the member var in place for(; i < endIndex; i++) { if(buffer[i] == c) { parsing = false; i++; break; } } if(i > index && i <= endIndex) { // Cache what we have so far String fragment = new String(buffer, index, i - index); result = result == null? fragment : result + fragment; } if(parsing) { // We're here because we ran out of data. See if there's any more if(fillBuffer()) { index = 0; } else { parsing = false; } } else { index = i; } } } return result; } /** * Reads a line from the underlying data stream. A line is terminated with one of: * \n * \r * \r\n * * If optimise is true and we notice that a single-character line terminator is being used, * subsequent calls will delegate to readInclusiveUntil which is slightly faster. * * @return The extracted String * @throws IOException */ public String readLine() throws IOException { String result = null; if(consistentLineTerminators && terminator != null) { return readUntilInclusive(terminator); } else if(cached != null) { result = cached; cached = null; } else { boolean parsing = true; char last = 'a'; while(parsing) { int i = index; // As odd as this construction looks, we get approx 6% speed increase over a straight while loop and updating the member var in place for(; i < endIndex; i++) { char c = buffer[i]; // We detect the following line terminators: // \r // \n // \r\n if(c == '\n') { i++; parsing = false; if(consistentLineTerminators && last != '\r') { terminator = '\n'; } break; } else if(last == '\r') { parsing = false; if(consistentLineTerminators && c != '\n') { terminator = '\r'; } break; } last = c; } if(i > index && i <= endIndex) { // Cache what we have so far String fragment = new String(buffer, index, i - index); result = result == null? fragment : result + fragment; } if(parsing) { // We're here because we ran out of data. See if there's any more if(fillBuffer()) { index = 0; } else { parsing = false; } } else { index = i; } } } return result; } /** * Allow a caller to hand back a line of input to us. Subsequent attempts to read data will consume * from this data first. * * @param line */ public void pushback(String line) { cached = line; } /** * Whether or not this Reader has more data available */ @Override public boolean ready() throws IOException { return cached != null || index < endIndex || sourceReader.ready(); } /** * Closes this reader. * Implemented purely for those Reader clients which expect to get an IOException from read() once the stream is closed, * rather than inferring it from the return value. * */ @Override public void close() throws IOException { isOpen = false; sourceReader.close(); } /* * (non-Javadoc) * @see java.io.Reader#read(char[], int, int) */ @Override public int read(char[] cbuf, int off, int len) throws IOException { if(!isOpen) { throw new IOException("Stream closed"); } int startOffset = off; while(len > 0) { if(cached != null) { // Use this up first char[] str = cached.toCharArray(); int charsToCopy = str.length; if(len < charsToCopy) { charsToCopy = len; } System.arraycopy(str, 0, cbuf, off, charsToCopy); off += charsToCopy; len -= charsToCopy; if(charsToCopy < str.length) { cached = new String(str, charsToCopy, str.length - charsToCopy); } else { cached = null; } } int charsToCopy = endIndex - index; if(charsToCopy > 0) { if(len < charsToCopy) { charsToCopy = len; } System.arraycopy(buffer, index, cbuf, off, charsToCopy); index += charsToCopy; off += charsToCopy; len -= charsToCopy; } if(len > 0) { // We've exhausted our buffered data - get more if(fillBuffer()) { index = 0; } else { break; } } } return startOffset == off? -1 : off - startOffset; } }