SplittingReader.java example

Explorer

c24-spring-master
- c24-spring-batch
  - src
    - main
      - java
        biz
        c24
        io
        spring
        batch
        C24CompoundValidationException.java
        config
        BatchItemReaderParser.java
        C24NamespaceHandler.java
        FileSourceParser.java
        FileWriterSourceParser.java
        ItemReaderParser.java
        ItemWriterParser.java
        TransformItemProcessorParser.java
        XmlItemReaderParser.java
        ZipFileSourceParser.java
        ZipFileWriterSourceParser.java
        processor
        C24TransformItemProcessor.java
        C24ValidatingItemProcessor.java
        reader
        C24BatchItemReader.java
        C24ItemReader.java
        C24ValidationException.java
        C24XmlItemReader.java
        ParseListener.java
        Parser.java
        SyncParser.java
        source
        FileSource.java
        SplittingReader.java
        SplittingReaderSource.java
        ZipFileSource.java
        writer
        C24ItemWriter.java
        source
        FileWriterSource.java
        WriterSource.java
        ZipFileWriterSource.java
    - test
      - java
        biz
        c24
        io
        spring
        batch
        config
        C24ItemReaderParserTests.java
        C24ItemWriterParserTests.java
        C24TransformItemProcessorParserTests.java
        processor
        C24TransformItemProcessorTests.java
        reader
        C24BatchItemReaderTests.java
        C24ItemReaderParallelTests.java
        C24ItemReaderTests.java
        C24XmlItemReaderTests.java
        performance
        StartSplittingParsePerformance.java
        StartStopSplittingParsePerformance.java
        XmlSplittingParsePerformance.java
        util
        ItemReaderJobRunner.java
        MockReader.java
        MockSplittingReaderSource.java
        source
        SplittingReaderTests.java
        writer
        C24ItemWriterTests.java
        source
        FileWriterSourceTests.java
- c24-spring-core
  - src
    - main
      - java
        biz
        c24
        io
        spring
        config
        BeanDefinitionUtils.java
        C24HttpMessageConverterBeanDefinitionParser.java
        C24MarshallerBeanDefinitionParser.java
        C24ModelBeanDefinitionParser.java
        C24NamespaceHandler.java
        FormatBeanDefinitionParser.java
        core
        C24Model.java
        DataFormat.java
        DataFormats.java
        http
        C24HttpMessageConverter.java
        oxm
        C24Marshaller.java
        sink
        AbstractSinkFactory.java
        BinarySinkFactory.java
        JsonSinkFactory.java
        OutputType.java
        SinkFactory.java
        TagValuePairSinkFactory.java
        TextualSinkFactory.java
        XmlSinkFactory.java
        source
        BinarySourceFactory.java
        FixSourceFactory.java
        JsonSourceFactory.java
        SourceFactory.java
        SwiftSourceFactory.java
        TextualSourceFactory.java
        XmlSourceFactory.java
        util
        C24Utils.java
    - test
      - java
        biz
        c24
        io
        spring
        config
        C24HttpMessageConverterBeanDefinitionParserIntegrationTests.java
        C24MarshallerBeanDefinitionParserIntegrationTests.java
        core
        DateFormatUnitTests.java
        http
        C24HttpMessageConverterUnitTests.java
        model
        TestConstants.java
        oxm
        C24MarshallerUnitTests.java
        util
        C24UtilsTests.java
- c24-spring-integration
  - src
    - main
      - java
        biz
        c24
        io
        spring
        integration
        C24Headers.java
        config
        C24NamespaceHandler.java
        FileSplittingTransformerParser.java
        MarshallingTransformerParser.java
        TransformerParser.java
        UnmarshallingTransformerParser.java
        ValidatingHeaderEnricherParser.java
        ValidatingMessageSelectorParser.java
        XPathHeaderEnricherParser.java
        XPathRouterParser.java
        XPathSelectorParser.java
        XPathTransformerParser.java
        jdbc
        CdoSqlParameterSource.java
        CdoSqlParameterSourceFactory.java
        router
        C24XPathRouter.java
        selector
        AbstractXPathMessageSelector.java
        C24BooleanTestXPathMessageSelector.java
        C24StringValueTestXPathMessageSelector.java
        C24ValidatingMessageSelector.java
        transformer
        C24FileSplittingTransformer.java
        C24MarshallingTransformer.java
        C24Transformer.java
        C24UnmarshallingTransformer.java
        C24ValidatingHeaderEnricher.java
        C24XPathHeaderEnricher.java
        C24XPathTransformer.java
        validation
        C24AggregatedMessageValidationException.java
        C24ValidatingMessageProcessor.java
        xpath
        XPathEvaluationType.java
    - test
      - java
        biz
        c24
        io
        spring
        integration
        config
        BaseIntegrationTest.java
        FileSplitterTests.java
        HeaderEnricherTests.java
        MarshalITests.java
        SelectorTests.java
        UnmarshalITests.java
        ValidatingHeaderEnricherTests.java
        ValidatingSelectorTests.java
        XPathRouterDefaultChannelTests.java
        XPathRouterDefaultMappingTests.java
        XPathRouterMappingTests.java
        XPathRouterNoDefaultChannelTests.java
        jdbc
        CdoSqlParameterSourceTests.java
        router
        XPathRouterTestIUTests.java
        selectors
        ValidatingMessageSelectorTests.java
        test
        TestUtils.java
        transformer
        IoMarshallingTransformerIUTests.java
        IoTransformerIUTests.java
        IoUnmarshallingTransformerIUTests.java
        ValidatingHeaderEnricherTests.java
        XPathTransformerIUTests.java

package biz.c24.io.spring.batch.reader.source;

import java.io.IOException;
import java.io.Reader;

/**
 * Utility class to rapidly split up data into lines.
 * Created instead of using a BufferedReader or Scanner as we need the ability to 'push back' at most 1 line of data while also 
 * splitting lines on an arbitrary character
 * 
 * General performance of readLine is on a par with BufferedReader; however the optimise checks add around 1% in the worst case.
 * In the best case (where only a '\r' or '\n' is used as the line terminator or readUntil is used) performance is 15-20% faster 
 * than BufferedReader.
 * 
 * @author Andrew Elmore
 *
 */
public class SplittingReader extends Reader {
    
    /**
     * Where we actually get our source data from
     */
    private Reader sourceReader;
    
    /**
     * Cache for data read from the sourceReader
     */
    private char[] buffer = new char[10000];
    /**
     * Current index into the buffer
     */
    private int index = 0;
    /**
     * Index in the buffer up to which data is populated
     */
    private int endIndex = 0;
    
    /**
     * Tracks whether we've been closed or not
     */
    boolean isOpen = true;
    
    /**
     * Allow up to one 'line' of data to be pushed back. Will be returned by any calls to readLine/Until prior to consuming more
     * data from the buffer.
     */
    private String cached = null;
    
    /**
     * If we detect a single-character line terminator, can we assume that all lines use that terminator?
     */
    private final boolean consistentLineTerminators;
    
    /**
     * Single character line terminator if detected
     */
    private Character terminator = null;
    
    public SplittingReader(Reader reader) {
        this.sourceReader = reader;
        this.consistentLineTerminators = false;
    }
    
    /**
     * 
     * @param reader The underlying Reader to extract data from
     * @param consistentLineTerminators Set to true if all lines use the same line terminator for an approx 15% speed boost
     */
    public SplittingReader(Reader reader, boolean consistentLineTerminators) {
        this.sourceReader = reader;
        this.consistentLineTerminators = consistentLineTerminators;
    }
    
    public Reader getReader() {
        return sourceReader;
    }
    
    /**
     * Overwrites the contents of the current buffer with more data if available
     * 
     * @return True iff we read more data from the underlying sourceReader
     * @throws IOException
     */
    private boolean fillBuffer() throws IOException {
        if(endIndex >= 0) {
            endIndex = sourceReader.read(buffer, 0, buffer.length);
        }
        return endIndex > 0;
    }
    
    /**
     * Extracts characters from the data stream until either:
     * a) we run out of characters to read or
     * b) the next character to be read matches c
     * 
     * In other words c is not included at the end of the stream but will be the first character
     * of the next String read via this method.
     * 
     * 
     * @param c The character to stop extracting on. 
     * @return The extracted string
     * @throws IOException
     */
    public String readUntil(char c) throws IOException {
        String result = null;
        
        if(cached != null) {
            result = cached;
            cached = null;
        } else {
            boolean parsing = true;

            while(parsing) {
                // Skip the first character - if it matches c, we want the next one anyway
                int i = result == null? index + 1 : index;
                // As odd as this construction looks, we get approx 6% speed increase over a straight while loop and updating the member var in place
                for(; i < endIndex; i++) {
                    if(buffer[i] == c) {
                        parsing = false;
                        break;
                    }
                }
                
                if(i > index && i <= endIndex) {
                    // Cache what we have so far
                    String fragment = new String(buffer, index, i - index);
                    result = result == null? fragment : result + fragment;
                }
                
                if(parsing) {
                    // We're here because we ran out of data. See if there's any more
                    if(fillBuffer()) {
                        index = 0;
                    } else {
                        parsing = false;
                    }
                } else {
                    index = i;
                }
            }
        }
        return result;
    }
    
    
    /**
     * Extracts characters from the data stream until either:
     * a) we run out of characters to read or
     * b) the last character matches c
     * 
     * Unlike readUntil, c will be included as the last character of the returned string.
     * Subsequent calls with start with the next character.
     * 
     * @param c The character to stop extracting on. 
     * @return The extracted String
     * @throws IOException
     */
    public String readUntilInclusive(char c) throws IOException {
        String result = null;
        
        if(cached != null) {
            result = cached;
            cached = null;
        } else {
            boolean parsing = true;

            while(parsing) {
                int i = index;
                // As odd as this construction looks, we get approx 6% speed increase over a straight while loop and updating the member var in place
                for(; i < endIndex; i++) {
                    if(buffer[i] == c) {
                        parsing = false;
                        i++;
                        break;
                    }
                }
                
                if(i > index && i <= endIndex) {
                    // Cache what we have so far
                    String fragment = new String(buffer, index, i - index);
                    result = result == null? fragment : result + fragment;
                }
                
                if(parsing) {
                    // We're here because we ran out of data. See if there's any more
                    if(fillBuffer()) {
                        index = 0;
                    } else {
                        parsing = false;
                    }
                } else {
                    index = i;
                }
            }
        }
        return result;
    }
    
    /**
     * Reads a line from the underlying data stream. A line is terminated with one of:
     * \n
     * \r
     * \r\n
     * 
     * If optimise is true and we notice that a single-character line terminator is being used,
     * subsequent calls will delegate to readInclusiveUntil which is slightly faster.
     * 
     * @return The extracted String
     * @throws IOException
     */
    public String readLine() throws IOException {
        String result = null;
        
        if(consistentLineTerminators && terminator != null) {
            return readUntilInclusive(terminator);
        } else if(cached != null) {
            result = cached;
            cached = null;
        } else {
            boolean parsing = true;
            char last = 'a';

            while(parsing) {
                int i = index;
                // As odd as this construction looks, we get approx 6% speed increase over a straight while loop and updating the member var in place
                for(; i < endIndex; i++) {
                    char c = buffer[i];
                    // We detect the following line terminators:
                    // \r
                    // \n
                    // \r\n
                    if(c == '\n') {
                        i++;
                        parsing = false;
                        if(consistentLineTerminators && last != '\r') {
                            terminator = '\n';
                        }
                        break;
                    } else if(last == '\r') {
                        parsing = false;
                        if(consistentLineTerminators && c != '\n') {
                            terminator = '\r';
                        }
                        break;
                    }
                    last = c;
                }
                
                if(i > index && i <= endIndex) {
                    // Cache what we have so far
                    String fragment = new String(buffer, index, i - index);
                    result = result == null? fragment : result + fragment;
                }
                
                if(parsing) {
                    // We're here because we ran out of data. See if there's any more
                    if(fillBuffer()) {
                        index = 0;
                    } else {
                        parsing = false;
                    }
                } else {
                    index = i;
                }
            }
        }
        return result;
    }        
    
    /**
     * Allow a caller to hand back a line of input to us. Subsequent attempts to read data will consume
     * from this data first.
     * 
     * @param line
     */
    public void pushback(String line) {
        cached = line;
    }
    
    /**
     * Whether or not this Reader has more data available
     */
    @Override
    public boolean ready() throws IOException {
        return cached != null || index < endIndex || sourceReader.ready();
    }
    
    /**
     * Closes this reader.
     * Implemented purely for those Reader clients which expect to get an IOException from read() once the stream is closed,
     * rather than inferring it from the return value.
     * 
     */
    @Override
    public void close() throws IOException {
        isOpen = false;
        sourceReader.close();
    }

    /*
     * (non-Javadoc)
     * @see java.io.Reader#read(char[], int, int)
     */
    @Override
    public int read(char[] cbuf, int off, int len) throws IOException {
        
        if(!isOpen) {
            throw new IOException("Stream closed");
        }

        int startOffset = off;
        
        while(len > 0) {
            if(cached != null) {
                // Use this up first
                char[] str = cached.toCharArray();
                
                int charsToCopy = str.length;
                if(len < charsToCopy) {
                    charsToCopy = len;
                }
                System.arraycopy(str, 0, cbuf, off, charsToCopy);
                
                off += charsToCopy;
                len -= charsToCopy;
                
                if(charsToCopy < str.length) {
                    cached = new String(str, charsToCopy, str.length - charsToCopy);
                } else {
                    cached = null;
                }
            }
            
            int charsToCopy = endIndex - index;
            if(charsToCopy > 0) {
                if(len < charsToCopy) {
                    charsToCopy = len;
                }
                System.arraycopy(buffer, index, cbuf, off, charsToCopy);
                
                index += charsToCopy;
                off += charsToCopy;
                len -= charsToCopy;
            }
            
            if(len > 0) {
                // We've exhausted our buffered data - get more
                if(fillBuffer()) {
                    index = 0;
                } else {
                    break;
                }
            }
            
        }

        return startOffset == off? -1 : off - startOffset;
    } 
}