/*
* Copyright 2012 C24 Technologies.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package biz.c24.io.spring.batch.reader;
import biz.c24.io.api.data.ComplexDataObject;
import biz.c24.io.api.data.Element;
import biz.c24.io.api.data.ValidationEvent;
import biz.c24.io.api.data.ValidationException;
import biz.c24.io.api.data.ValidationListener;
import biz.c24.io.api.data.ValidationManager;
import biz.c24.io.api.presentation.Source;
import biz.c24.io.api.presentation.TextualSource;
import biz.c24.io.spring.batch.C24CompoundValidationException;
import biz.c24.io.spring.batch.reader.source.SplittingReaderSource;
import biz.c24.io.spring.batch.reader.source.SplittingReader;
import biz.c24.io.spring.core.C24Model;
import biz.c24.io.spring.source.SourceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.NonTransientResourceException;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import org.springframework.util.Assert;
import javax.annotation.PostConstruct;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
import java.util.LinkedList;
import java.util.regex.Pattern;
/**
* ItemReader that reads ComplexDataObjects from a SplittingReaderSource.
* Optionally supports the ability to split the incoming data stream into entities by use of a
* regular expression to detect the start of a new entity; this allows the more expensive parsing
* to be performed in parallel.
*
* The optional splitting process currently assumes that each line:
* a) Is terminated with a platform specific CRLF (or equivalent)
* b) Belongs to at most one entity
*
* In all cases the optional validation takes place in parallel if multiple threads are used.
*
* @author Andrew Elmore
*/
public class C24ItemReader<Result> implements ItemReader<Result> {
private static Logger LOG = LoggerFactory.getLogger(C24ItemReader.class);
/**
* SourceFactory to use to generate our IO Sources
*/
private SourceFactory ioSourceFactory = null;
/**
* Parser to use where we do not have an elementStartPattern
*/
private volatile Parser parser = null;
/**
* Cache for parsers where we can parallelise parsing
*/
private ThreadLocal<Parser> threadedParser = new ThreadLocal<Parser>();
/**
* The type of CDO that we will parse from the source
*/
private Element elementType;
/**
* An optional pattern to use to quickly split the readerSource so we can perform more heavyweight
* parsing in parallel
*/
private Pattern elementStartPattern = null;
/**
* An optional pattern to use to identify the end of a message. If specified, the message must end with an
* EOF or this pattern. Additional matches of the startPattern before presence of the stop pattern will
* not trigger the start of a new message
*/
private Pattern elementStopPattern = null;
/**
* The source from which we'll read the data
*/
private SplittingReaderSource source;
/**
* Control whether or not we validate the parsed CDOs
*/
private ThreadLocal<ValidationManager> validator = null;
/**
* If we're validating, do we failfast or collect all failures?
*/
private boolean failfast = true;
/**
* Allow clients to register a callback to intercept elements as we read them.
*/
private ParseListener<Object, Result> parseListener = null;
public C24ItemReader() {
}
/**
* Asserts that we have been properly configured
*/
@PostConstruct
public void validateConfiguration() {
Assert.notNull(elementType, "Element type must be set, either explicitly or by setting the model");
Assert.notNull(source, "Source must be set");
if(elementStopPattern != null) {
Assert.notNull(elementStartPattern, "elementStopPattern can only be used if an elementStartPattern is also set");
}
}
/**
* Get the parser listener registered with this C24ItemReader (if any)
* @return The currently registered ParseListener, null if there isn't one.
*/
public ParseListener<Object, Result> getParseListener() {
return parseListener;
}
/**
* Registers a ParseListener
* @param parseListener The object which should receive the callbacks, null to remove an existing ParseListener
*/
public void setParseListener(ParseListener<Object, Result> parseListener) {
this.parseListener = parseListener;
}
/**
* Returns the element type that we will attempt to parse from the source
*/
public Element getElementType() {
return elementType;
}
/**
* Set the type of element that we will attempt to parse from the source
*
* @param elementType The type of element that we want to parse from the source
*/
public void setElementType(Element elementType) {
this.elementType = elementType;
}
/**
* Allows setting of element type via the supplied model
*
* @param model The model of the type we wish to parse
*/
public void setModel(C24Model model) {
elementType = model.getRootElement();
}
/**
* Returns the regular expression that we're using to split up in the incoming data.
* Null if not set.
*/
public String getElementStartPattern() {
return elementStartPattern != null? elementStartPattern.pattern() : null;
}
/**
* Sets the regular expression used to quickly split up the source into individual entities for parsing
*
* @param elementStartRegEx The regular expression to identify the start of a new entity in the source
*/
public void setElementStartPattern(String elementStartRegEx) {
this.elementStartPattern = Pattern.compile(elementStartRegEx, Pattern.DOTALL);
}
/**
* Returns the pattern we're using to to determine the end of a message.
*
* @return end of element pattern. Null if not set.
*/
public String getElementStopPattern() {
return elementStopPattern != null? elementStopPattern.pattern() : null;
}
/**
* In conjunction with the element start regex, used to detect the end of a message. Note that it is possible for a single
* line to match both the start and stop patterns and hence be a complete element on its own.
*
* @param elementStopRegEx The regular expression to identify the end of an entity in the source
*/
public void setElementStopPattern(String elementStopRegEx) {
this.elementStopPattern = Pattern.compile(elementStopRegEx, Pattern.DOTALL);
}
/**
* Set whether or not you want validation to be performed on the parsed CDOs.
* An exception will be thrown for any entity which fails validation.
*
* @param validate Whether or not to validate parsed CDOs
*/
public void setValidate(boolean validate) {
validator = validate? new ThreadLocal<ValidationManager>() : null;
}
/**
* Query whether or not this ItemReader will validate parsed CDOs
*
* @return True iff this ItemReader will automatically validate read CDOs
*/
public boolean isValidating() {
return validator != null;
}
/**
* Query whether this item reader will fail fast when validating CDOs
*
* @return True iff the validator will abort on first failure
*/
public boolean isFailfast() {
return failfast;
}
/**
* Set whether or not you want validation to fail fast.
* If false, the exception thrown when validating will be a compound exception will all validation failures
*
* @param failfast Whether or not to fail fast
*/
public void setFailfast(boolean failfast) {
this.failfast = failfast;
}
/**
* Gets the SplittingReaderSource from which CDOs are being parsed
*
* @return This reader's SplittingReaderSource
*/
public SplittingReaderSource getSource() {
return source;
}
/**
* Sets the source that this reader will read from
*
* @param source The SplittingReaderSource to read data from
*/
public void setSource(SplittingReaderSource source) {
this.source = source;
}
/**
* Sets the iO source factory to use
*
* @param ioSourceFactory
*/
public void setSourceFactory(SourceFactory ioSourceFactory) {
this.ioSourceFactory = ioSourceFactory;
}
public SourceFactory getSourceFactory() {
return this.ioSourceFactory;
}
/**
* Initialise our context
*
* @param stepExecution The step execution context
*/
@BeforeStep
public void setup(StepExecution stepExecution) {
source.initialise(stepExecution);
}
/**
* Clean up any resources we're consuming
*/
@AfterStep
public void cleanup() {
if(validator != null) {
validator = new ThreadLocal<ValidationManager>();
}
source.close();
}
/**
* Structure to associate a to-be-parsed element with externally supplied context.
* The ParseListener callback enables an external object to associate context with an element. This structure
* allows them to be held together during processing; this is necessary to avoid race conditions.
*
* @author Andrew Elmore
*/
protected static class ElementContext {
public ElementContext(String element, Object context) {
this.element = element;
this.context = context;
}
public Object context;
public String element;
}
/**
* Reads a line of text from the SplittingReader. The definition of line is implementation dependent.
* This implementation breaks lines around carriage returns and line feeds.
*
* @param reader The SplittingReader to consume characters from
* @return A line of text
* @throws IOException
*/
protected String readLine(SplittingReader reader) throws IOException {
return reader.readLine();
}
/**
* Extracts the textual data for an element from the SplittingReader using the elementStartPattern to split
* up the data.
*
* If a ParseListener is registered, it will receive a callback when a line is read from the reader and when
* an element has been extracted.
*
* @param reader The SplittingReader to extract the element from
*/
protected ElementContext readElement(SplittingReader reader) {
StringBuffer elementCache = new StringBuffer();
boolean inElement = false;
synchronized(reader) {
try {
while(reader.ready()) {
String line = readLine(reader);
if(line != null) {
if(parseListener != null) {
// Invoke callback
line = parseListener.processLine(line);
}
// We look for the start of a new element if either:
// a) We're not in an element or
// b) We don't have an elementStopPattern set (if we do and we're in a element, the presence of a line
// that matches the element start pattern is deemed to still be part of the same element)
if((!inElement || elementStopPattern == null) && elementStartPattern.matcher(line).matches()) {
// We've encountered the start of a new element
String message = elementCache.toString();
if(message.trim().length() > 0) {
// We were already parsing an element; thus we've finished extracting our element
// Cache the line
reader.pushback(line);
// ...and return what we have already extracted
ElementContext context = new ElementContext(message, parseListener == null? null : parseListener.getContext(message));
return context;
} else {
// This is the start of our element. Add it to our elementCache.
inElement = true;
}
}
if(inElement) {
// More data for our current element
elementCache.append(line);
// If we have an elementStopPattern, see if the line matched
if(elementStopPattern != null && elementStopPattern.matcher(line).matches()) {
// We've encountered the end of the element
break;
}
}
}
}
} catch(IOException ioEx) {
throw new NonTransientResourceException("Failed to extract entity", ioEx);
}
}
String message = elementCache.toString();
ElementContext context = new ElementContext(message, parseListener == null? null : parseListener.getContext(message));
return context;
}
/**
* Called once a thread determines it has exhausted the current parser (more accurately, the underlying Reader).
* Triggers creation of an appropriate new Parser next time getParser is called.
*
* @param parser The parser that has been exhausted.
* @throws IOException
*/
private void discardParser(Parser parser) {
// If there's no splitting pattern, we have to ensure that we discard the underlying reader too
if(elementStartPattern == null) {
try {
source.discard(parser.getSplitter());
} catch(IOException ioEx) {
// We'll carry on; worst case scenario a failure will be logged multiple times
LOG.warn("Failed to close reader on source {}", source.getName());
}
}
if(this.elementStartPattern == null && source.useMultipleThreadsPerReader()) {
synchronized(this) {
if(this.parser == parser) {
this.parser = null;
}
}
} else {
threadedParser.set(null);
}
}
/**
* Gets the appropriate iO Source to use to read the message.
* If ioSourceFactory is not set, it defaults to the model's default source.
*
* @param An optional Reader to pass to the source's setReader method
*
* @return A configured iO source
*/
private Source getIoSource(Reader reader) {
Source source = null;
if(ioSourceFactory == null) {
// Use the default
source = elementType.getModel().source();
if(reader != null) {
source.setReader(reader);
}
} else {
// If the reader is null, we have to give the factory a dummy one
source = ioSourceFactory.getSource(reader != null? reader : new StringReader(""));
}
if(source instanceof TextualSource) {
((TextualSource)source).setEndOfDataRequired(false);
}
return source;
}
/**
* Gets a configured parser for this thread to use to parse messages.
* Depending on configuration, threads may or may not share the source.
*
* @return The parser this thread should use to parse messages.
*/
private Parser getParser() {
Parser returnParser = null;
// We operate in one of 3 modes
// 1. We have no splitter pattern and the ReaderSource advises us to share the Reader between threads
// In this case all threads must share the same parser; make sure that we return a synchronized parser
if(this.elementStartPattern == null && source.useMultipleThreadsPerReader()) {
returnParser = parser;
if(returnParser == null) {
synchronized(this) {
if(parser == null) {
SplittingReader splitter = source.getReader();
if(splitter != null) {
returnParser = new SyncParser(splitter, getIoSource(splitter), elementType);
parser = returnParser;
}
}
}
}
}
// 2. The ReaderSource advises us not to share the reader between threads
// In this case, each thread will have its own parser and we need to ask for a new Reader each time we create one
else if(!source.useMultipleThreadsPerReader()) {
returnParser = threadedParser.get();
boolean needNewReader = returnParser == null;
if(!needNewReader) {
try {
needNewReader = !returnParser.getReader().ready();
} catch (IOException ex) {
// Unhelpfully if the stream has been closed beneath our feet this is how we find out about it
// Even more unhelpfully, it appears as though the SAXParser does exactly that when it's finished parsing
needNewReader = true;
}
}
if(needNewReader) {
SplittingReader splitter = source.getNextReader();
if(splitter != null) {
// If we don't have a splitting pattern, pass the splitter directly to the iO source
// If we do, pass null as we'll create a new Reader for it below
returnParser = new Parser(splitter, getIoSource(this.elementStartPattern == null? splitter : null), elementType);
threadedParser.set(returnParser);
}
}
}
// 3. We have a splitter pattern and the Reader source advises us to share the Reader between threads
// In this case each thread will have its own parser but we'll share a reader and keep using it until it runs out
else {
returnParser = threadedParser.get();
if(returnParser == null) {
SplittingReader splitter = source.getReader();
if(splitter != null) {
returnParser = new Parser(splitter, getIoSource(null), elementType);
threadedParser.set(returnParser);
}
}
}
return returnParser;
}
/*
* (non-Javadoc)
* @see org.springframework.batch.item.ItemReader#read()
*/
@SuppressWarnings("unchecked")
@Override
public Result read() throws UnexpectedInputException,
ParseException, NonTransientResourceException {
ComplexDataObject result = null;
Object context = null;
Parser parser = null;
// Keep trying to parse an entity until either we get one (result != null) or we run out of data to read (parser == null)
// BufferedReaderSources such as the ZipFileSource can return multiple BufferedReaders; when our current one is exhausted it
// will return another one
while(result == null && (parser = getParser()) != null) {
if(elementStartPattern != null) {
// We're possibly sharing a BufferedReader with other threads. Get our data out of it as quickly as we can to reduce
// the amount of time we spend blocking others
SplittingReader reader = parser.getSplitter();
if(reader == null) {
// There's nothing left to read
break;
}
// Get the textual source for an element from the reader
ElementContext elementContext = readElement(reader);
String element = elementContext.element;
context = elementContext.context;
// If we got something then parse it
if(element != null && element.trim().length() > 0) {
StringReader stringReader = new StringReader(element);
parser.setReader(stringReader);
try {
result = parser.read();
} catch(IOException ioEx) {
throw new ParseException("Failed to parse CDO from " + source.getName() + ". Message: " + element, ioEx);
}
} else {
// This parser has been exhausted
discardParser(parser);
}
} else {
// We'll parse CDOs from the parser in serial
try {
result = parser.read();
} catch(IOException ioEx) {
throw new ParseException("Failed to parse CDO from " + source.getName(), ioEx);
} finally {
if(result != null && result.getTotalAttrCount() == 0 && result.getTotalElementCount() == 0) {
// We didn't manage to read anything
result = null;
}
if(result == null) {
// We've exhausted this reader
// In the event of an exception being thrown there might still be data left in the reader
// but as we have no way to skip to the next message, we have to abandon it
discardParser(parser);
}
}
}
}
if(validator != null && result != null) {
try {
ValidationManager mgr = validator.get();
if(mgr == null) {
mgr = new ValidationManager();
validator.set(mgr);
}
if(failfast) {
mgr.validateByException(result);
} else {
// Capture all failures
final Collection<ValidationEvent> events = new LinkedList<ValidationEvent>();
ValidationListener listener = new ValidationListener() {
public void validationPassed(ValidationEvent ve) {
}
public void validationFailed(ValidationEvent ve) {
events.add(ve);
}
};
mgr.addValidationListener(listener);
try {
if(!mgr.validateByEvents(result)) {
if(events.size() == 1) {
// Treat it as though we were validating by exception
mgr.setEventBased(false);
mgr.fireValidationEvent(events.iterator().next());
} else {
throw new C24CompoundValidationException(result, events);
}
}
} finally {
mgr.removeValidationListener(listener);
}
}
} catch(ValidationException vEx) {
throw new C24ValidationException("Failed to validate message: " + vEx.getLocalizedMessage() + " [" + source.getName() + "]", result, vEx);
}
}
// If we have a ParseListener registered, allow it to intercept the return value
return parseListener == null || result == null? (Result)result : parseListener.process(result, context);
}
}