/* * Copyright 2012 C24 Technologies. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package biz.c24.io.spring.batch.reader.source; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.util.Enumeration; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import org.springframework.batch.core.StepExecution; import org.springframework.core.io.Resource; import biz.c24.io.spring.util.C24Utils; /** * An implementation of SplittingReaderSource which extracts its data from Zip files. * Expects to be told the path of the file to write to by the supplied Resource or, * if not specified, from a property called input.file in the job parameters * (as populated by Spring Batch's org.springframework.batch.admin.integration.FileToJobLaunchRequestAdapter) * * @author Andrew Elmore */ public class ZipFileSource implements SplittingReaderSource { /** * The name of the zip file we're reading from */ private String name; /** * The current BufferedReader to be returned in calls to getReader if not exhausted */ private volatile SplittingReader reader = null; /** * The underlying zipFile */ private ZipFile zipFile; /** * An iterator over the entries in the zip file */ private Enumeration<? extends ZipEntry> zipEntries; /** * A hint to our users; should they use multiple threads on a single reader or ask us * for a different reader for each thread? */ private boolean useMultipleThreadsPerReader = true; /** * How many lines at the start of the file should we skip? */ private int skipLines = 0; /** * The Resource we acquire InputStreams from */ private Resource resource = null; private String encoding = C24Utils.DEFAULT_FILE_ENCODING; private boolean consistentLineTerminators = true; /* * (non-Javadoc) * @see biz.c24.io.spring.batch.reader.source.SplittingReaderSource#getName() */ public String getName() { return name; } /* (non-Javadoc) * @see biz.c24.spring.batch.BufferedReaderSource#initialise(org.springframework.batch.core.StepExecution) */ public void initialise(StepExecution stepExecution) { try { // Get an File and a name for where we're reading from // Use the Resource if supplied File source = null; if(resource != null) { name = resource.getDescription(); source = resource.getFile(); } else { // If no resource supplied, fallback to a Job parameter called input.file name = stepExecution.getJobParameters().getString("input.file"); // Remove any leading file:// if it exists if(name.startsWith("file://")) { name = name.substring("file://".length()); } source = new File(name); } zipFile = new ZipFile(source); zipEntries = zipFile.entries(); ZipEntry entry = getNextZipEntry(); if(entry != null) { // Prime the reader reader = getReader(entry); } // If we have a large number of ZipEntries and the first one looks relatively small, advise // callers to use a thread per reader if(entry != null && zipFile.size() > 20 && (entry.getSize() == -1 || entry.getSize() < 100000)) { useMultipleThreadsPerReader = false; } } catch (IOException e) { throw new RuntimeException(e); } } /* (non-Javadoc) * @see biz.c24.spring.batch.BufferedReaderSource#close() */ public void close() { if(zipFile != null) { try { zipFile.close(); } catch (IOException e) { throw new RuntimeException(e); } } } private SplittingReader getReader(ZipEntry entry) throws IOException { SplittingReader newReader = new SplittingReader(new InputStreamReader(zipFile.getInputStream(entry), getEncoding()), consistentLineTerminators); if(skipLines > 0) { for(int i = 0; i < skipLines && newReader.ready(); i++) { // Skip the line newReader.readLine(); } } return newReader; } /** * Gets the next ZipEntry that isn't a directory * @return The next file-type ZipEntry, null if there isn't one */ private synchronized ZipEntry getNextZipEntry() { ZipEntry next = null; while(next == null && zipEntries.hasMoreElements()) { next = zipEntries.nextElement(); if(next.isDirectory()) { next = null; } } return next; } /* (non-Javadoc) * @see biz.c24.spring.batch.BufferedReaderSource#getReader() */ public SplittingReader getReader() { try { if(reader != null && !reader.ready()) { synchronized(this) { // Multiple threads could be calling this in parallel; check the work hasn't already been performed for us if(reader != null && !reader.ready()) { // Our current reader is exhausted... getNextReader(); } } } return reader; } catch (IOException e) { throw new RuntimeException(e); } } @Override public synchronized SplittingReader getNextReader() { SplittingReader retVal = reader; if(retVal != null) { // Set up the next reader to return ZipEntry next = getNextZipEntry(); if(next != null) { try { reader = getReader(next); } catch (IOException e) { throw new RuntimeException(e); } } else { reader = null; } } return retVal; } @Override public boolean useMultipleThreadsPerReader() { return useMultipleThreadsPerReader; } @Override public synchronized void discard(SplittingReader reader) throws IOException { if(this.reader == reader) { getNextReader(); } reader.close(); } /** * How many lines will be skipped at the start of the file before the Reader is handed to callers? * @return the number of lines to skip at the start of each ZipEntry */ public int getSkipLines() { return skipLines; } /** * How many lines should be skipped at the start of the file before the Reader is handed to callers? * @param skipLines */ public void setSkipLines(int skipLines) { this.skipLines = skipLines; } /** * The resource we acquire InputStreams from * @return the resource which references the zip file this ZipFileSource will read from */ public Resource getResource() { return resource; } /** * Set the resource we acquire InputStreams from */ public void setResource(Resource resource) { this.resource = resource; } /** * Returns the encoding we are using when reading the file. * @return the encoding being used to read the file */ public String getEncoding() { return encoding; } /** * Sets the encoding to use to read the file * @param encoding the encoding the use */ public void setEncoding(String encoding) { this.encoding = encoding; } /** * Do we expect all lines in our input to use the same line terminator? * @return */ public boolean isConsistentLineTerminators() { return consistentLineTerminators; } /** * If we know that all lines within the file use the same line terminator, we can provide a hint to the * SplittingReader to optimise its data extraction * * @param consistentLineTerminators Set to true if all lines use the same line terminator for a speed boost during splitting */ public void setConsistentLineTerminators(boolean consistentLineTerminators) { this.consistentLineTerminators = consistentLineTerminators; } }