/*
* Copyright 2012 C24 Technologies.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package biz.c24.io.spring.batch.reader.source;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.springframework.batch.core.StepExecution;
import org.springframework.core.io.Resource;
import biz.c24.io.spring.util.C24Utils;
/**
* An implementation of SplittingReaderSource which extracts its data from Zip files.
* Expects to be told the path of the file to write to by the supplied Resource or,
* if not specified, from a property called input.file in the job parameters
* (as populated by Spring Batch's org.springframework.batch.admin.integration.FileToJobLaunchRequestAdapter)
*
* @author Andrew Elmore
*/
public class ZipFileSource implements SplittingReaderSource {
/**
* The name of the zip file we're reading from
*/
private String name;
/**
* The current BufferedReader to be returned in calls to getReader if not exhausted
*/
private volatile SplittingReader reader = null;
/**
* The underlying zipFile
*/
private ZipFile zipFile;
/**
* An iterator over the entries in the zip file
*/
private Enumeration<? extends ZipEntry> zipEntries;
/**
* A hint to our users; should they use multiple threads on a single reader or ask us
* for a different reader for each thread?
*/
private boolean useMultipleThreadsPerReader = true;
/**
* How many lines at the start of the file should we skip?
*/
private int skipLines = 0;
/**
* The Resource we acquire InputStreams from
*/
private Resource resource = null;
private String encoding = C24Utils.DEFAULT_FILE_ENCODING;
private boolean consistentLineTerminators = true;
/*
* (non-Javadoc)
* @see biz.c24.io.spring.batch.reader.source.SplittingReaderSource#getName()
*/
public String getName() {
return name;
}
/* (non-Javadoc)
* @see biz.c24.spring.batch.BufferedReaderSource#initialise(org.springframework.batch.core.StepExecution)
*/
public void initialise(StepExecution stepExecution) {
try {
// Get an File and a name for where we're reading from
// Use the Resource if supplied
File source = null;
if(resource != null) {
name = resource.getDescription();
source = resource.getFile();
} else {
// If no resource supplied, fallback to a Job parameter called input.file
name = stepExecution.getJobParameters().getString("input.file");
// Remove any leading file:// if it exists
if(name.startsWith("file://")) {
name = name.substring("file://".length());
}
source = new File(name);
}
zipFile = new ZipFile(source);
zipEntries = zipFile.entries();
ZipEntry entry = getNextZipEntry();
if(entry != null) {
// Prime the reader
reader = getReader(entry);
}
// If we have a large number of ZipEntries and the first one looks relatively small, advise
// callers to use a thread per reader
if(entry != null && zipFile.size() > 20 && (entry.getSize() == -1 || entry.getSize() < 100000)) {
useMultipleThreadsPerReader = false;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/* (non-Javadoc)
* @see biz.c24.spring.batch.BufferedReaderSource#close()
*/
public void close() {
if(zipFile != null) {
try {
zipFile.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
private SplittingReader getReader(ZipEntry entry) throws IOException {
SplittingReader newReader = new SplittingReader(new InputStreamReader(zipFile.getInputStream(entry), getEncoding()), consistentLineTerminators);
if(skipLines > 0) {
for(int i = 0; i < skipLines && newReader.ready(); i++) {
// Skip the line
newReader.readLine();
}
}
return newReader;
}
/**
* Gets the next ZipEntry that isn't a directory
* @return The next file-type ZipEntry, null if there isn't one
*/
private synchronized ZipEntry getNextZipEntry() {
ZipEntry next = null;
while(next == null && zipEntries.hasMoreElements()) {
next = zipEntries.nextElement();
if(next.isDirectory()) {
next = null;
}
}
return next;
}
/* (non-Javadoc)
* @see biz.c24.spring.batch.BufferedReaderSource#getReader()
*/
public SplittingReader getReader() {
try {
if(reader != null && !reader.ready()) {
synchronized(this) {
// Multiple threads could be calling this in parallel; check the work hasn't already been performed for us
if(reader != null && !reader.ready()) {
// Our current reader is exhausted...
getNextReader();
}
}
}
return reader;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public synchronized SplittingReader getNextReader() {
SplittingReader retVal = reader;
if(retVal != null) {
// Set up the next reader to return
ZipEntry next = getNextZipEntry();
if(next != null) {
try {
reader = getReader(next);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
reader = null;
}
}
return retVal;
}
@Override
public boolean useMultipleThreadsPerReader() {
return useMultipleThreadsPerReader;
}
@Override
public synchronized void discard(SplittingReader reader) throws IOException {
if(this.reader == reader) {
getNextReader();
}
reader.close();
}
/**
* How many lines will be skipped at the start of the file before the Reader is handed to callers?
* @return the number of lines to skip at the start of each ZipEntry
*/
public int getSkipLines() {
return skipLines;
}
/**
* How many lines should be skipped at the start of the file before the Reader is handed to callers?
* @param skipLines
*/
public void setSkipLines(int skipLines) {
this.skipLines = skipLines;
}
/**
* The resource we acquire InputStreams from
* @return the resource which references the zip file this ZipFileSource will read from
*/
public Resource getResource() {
return resource;
}
/**
* Set the resource we acquire InputStreams from
*/
public void setResource(Resource resource) {
this.resource = resource;
}
/**
* Returns the encoding we are using when reading the file.
* @return the encoding being used to read the file
*/
public String getEncoding() {
return encoding;
}
/**
* Sets the encoding to use to read the file
* @param encoding the encoding the use
*/
public void setEncoding(String encoding) {
this.encoding = encoding;
}
/**
* Do we expect all lines in our input to use the same line terminator?
* @return
*/
public boolean isConsistentLineTerminators() {
return consistentLineTerminators;
}
/**
* If we know that all lines within the file use the same line terminator, we can provide a hint to the
* SplittingReader to optimise its data extraction
*
* @param consistentLineTerminators Set to true if all lines use the same line terminator for a speed boost during splitting
*/
public void setConsistentLineTerminators(boolean consistentLineTerminators) {
this.consistentLineTerminators = consistentLineTerminators;
}
}