package com.anjlab.csv2db;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.sql.BatchUpdateException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptException;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.input.AutoCloseInputStream;
import org.apache.commons.lang3.StringUtils;
import com.codahale.metrics.Timer;
import com.codahale.metrics.Timer.Context;
import au.com.bytecode.opencsv.CSVReader;
public class Importer
{
private final Configuration config;
private final int numberOfThreads;
private final PerformanceCounter perfCounter;
public Importer(Configuration config, int numberOfThreads, PerformanceCounter perfCounter)
{
this.numberOfThreads = numberOfThreads;
this.config = config;
this.perfCounter = perfCounter;
}
public void performImport(String filename)
throws ClassNotFoundException, SQLException, IOException,
ScriptException, ConfigurationException, InterruptedException,
ArchiveException
{
performImport(filename, new FilenameFilter()
{
@Override
public boolean accept(File dir, String name)
{
return true;
}
});
}
public void performImport(String filename, FilenameFilter filenameFilter)
throws ClassNotFoundException, SQLException, IOException, ScriptException,
ConfigurationException, InterruptedException, ArchiveException
{
final File inputFile = new File(filename);
if (inputFile.isDirectory())
{
importFromDir(inputFile, filenameFilter);
}
else if (StringUtils.endsWithIgnoreCase(filename, ".zip"))
{
importFromZip(inputFile, filenameFilter);
}
else
{
logImportingFrom(inputFile.getName());
performImport(new AutoCloseInputStream(new FileInputStream(inputFile)));
}
}
private void importFromDir(final File input, FilenameFilter filenameFilter)
throws ClassNotFoundException, SQLException, IOException,
ScriptException, ConfigurationException, FileNotFoundException, InterruptedException
{
for (File file : input.listFiles())
{
if (file.isFile() && filenameFilter.accept(null, file.getName()))
{
logImportingFrom(file.getName());
performImport(new AutoCloseInputStream(new FileInputStream(file)));
}
}
}
private void importFromZip(final File inputFile, FilenameFilter filenameFilter)
throws IOException, ClassNotFoundException,
SQLException, ScriptException, ConfigurationException, InterruptedException, ArchiveException
{
try (ArchiveInputStream archiveInput = new ArchiveStreamFactory()
.createArchiveInputStream(
FilenameUtils.getExtension(inputFile.getName()),
new AutoCloseInputStream(new FileInputStream(inputFile))))
{
while (true)
{
ArchiveEntry entry = archiveInput.getNextEntry();
if (entry != null)
{
if (!entry.isDirectory() && filenameFilter.accept(null, entry.getName()))
{
logImportingFrom(entry.getName());
performImport(archiveInput);
}
}
else
{
break;
}
}
}
}
private void logImportingFrom(String name)
{
System.out.println("\nImporting from '" + name + "'...");
}
public void performImport(InputStream input)
throws ClassNotFoundException, SQLException, IOException, ScriptException, ConfigurationException, InterruptedException
{
ExecutorService executorService = Executors.newFixedThreadPool(numberOfThreads);
Mediator mediator = new SharedBlockingQueueMediator(config, numberOfThreads);
for (int i = 0; i < numberOfThreads; i++)
{
executorService.submit(createConsumer(mediator, i));
}
executorService.shutdown();
readInput(input, mediator);
executorService.awaitTermination(1, TimeUnit.DAYS);
}
private void readInput(InputStream input, Mediator mediator) throws InterruptedException
{
CSVReader reader = null;
try
{
Configuration.CSVOptions csvOptions = config.getCsvOptions();
reader = new CSVReader(new InputStreamReader(input),
csvOptions.getSeparatorChar(),
csvOptions.getQuoteChar(),
csvOptions.getEscapeChar(),
csvOptions.getSkipLines(),
csvOptions.isStrictQuotes(),
csvOptions.isIgnoreLeadingWhiteSpace());
String[] nextLine;
while ((nextLine = reader.readNext()) != null)
{
// XXX This may block if all handlers terminated with error
mediator.dispatch(nextLine);
if (perfCounter != null)
{
perfCounter.lineEnqueued();
}
}
mediator.producerDone();
}
catch (IOException e)
{
throw new RuntimeException(e);
}
finally
{
closeQuietly(reader);
}
}
private void closeQuietly(Closeable closeable)
{
if (closeable != null)
{
try
{
closeable.close();
}
catch (IOException e)
{
e.printStackTrace(System.err);
}
}
}
private Runnable createConsumer(final Mediator mediator, final int threadId)
throws SQLException, ScriptException, ClassNotFoundException, ConfigurationException
{
return new Runnable()
{
final RecordHandler strategy = getRecordHandlerStrategy(
createConnection(), config.getScriptEngine(), mediator, threadId);
final Timer recordsMeter;
// TODO No need building & binding emitFunction if `config.getMap() == null`
// The map function accepts nameValues and the JavaScript emit callback function.
// The emit function should call back to Java, but since we can't create pure Java
// object representing JavaScript function we create this bridge that will in turn
// do the actual call to Java using the #handleRecord(...) interface method
final Object emitFunction;
{
String threadLocalEmit = "emit" + threadId;
String threadLocalStrategy = "strategy" + threadId;
StringBuilder emitFunctionDeclaration = new StringBuilder()
.append("function ").append(threadLocalEmit).append("(nameValues) {")
.append(threadLocalStrategy).append(".handleRecord(nameValues);")
.append("}");
try
{
config.getScriptEngine().getContext().setAttribute(
threadLocalStrategy, strategy, ScriptContext.ENGINE_SCOPE);
emitFunction = config.getScriptEngine().eval(emitFunctionDeclaration.toString());
}
catch (ScriptException e)
{
throw new RuntimeException("Internal error", e);
}
recordsMeter = Import.isMetricsEnabled()
? Import.METRIC_REGISTRY.timer("thread-" + threadId + ".records")
: null;
}
@Override
public void run()
{
try
{
readLines(mediator, threadId);
}
catch (Throwable t)
{
printStackTrace(t);
}
finally
{
try
{
strategy.close();
}
catch (Exception e)
{
printStackTrace(e);
}
}
}
private void readLines(final Mediator mediator, final int threadId)
throws InterruptedException, SQLException, ConfigurationException, ScriptException
{
Object next = mediator.take(threadId);
while (true)
{
Context time = null;
if (recordsMeter != null)
{
time = recordsMeter.time();
}
try
{
if (!handleRecord(next))
{
break;
}
next = mediator.take(threadId);
}
finally
{
if (time != null)
{
time.stop();
}
}
}
}
@SuppressWarnings("unchecked")
private boolean handleRecord(Object record)
throws SQLException, ConfigurationException, ScriptException, InterruptedException
{
if (record instanceof String[])
{
// record is an array of values from CSV line
String[] columns = (String[]) record;
if (columns.length == 0)
{
return false;
}
Map<String, Object> nameValues = new HashMap<String, Object>();
for (Map.Entry<Integer, String> mapping : config.getColumnMappings().entrySet())
{
String value = columns[mapping.getKey()];
String targetColumnName = mapping.getValue();
nameValues.put(targetColumnName, value);
}
if (config.getMap() == null)
{
strategy.handleRecord(nameValues);
}
else
{
// Note that all emitted values (if any)
// will be handled by this same thread
config.getMap().eval(
config.getScriptEngine(),
nameValues,
emitFunction);
}
}
else if (record instanceof Map)
{
// re-routed record
strategy.handleRecord((Map<String, Object>) record);
}
else
{
// null-value?
return false;
}
return true;
}
private void printStackTrace(Throwable t)
{
if (t instanceof BatchUpdateException)
{
printBatchUpdateException((BatchUpdateException) t);
}
else if (t.getCause() instanceof BatchUpdateException)
{
printBatchUpdateException((BatchUpdateException) t.getCause());
}
else
{
t.printStackTrace(System.err);
}
}
private void printBatchUpdateException(BatchUpdateException bue)
{
bue.printStackTrace(System.err);
SQLException se = bue.getNextException();
while (se != null)
{
System.err.println("Next SQLException in chain:");
se.printStackTrace(System.err);
se = se.getNextException();
}
}
};
}
public Connection createConnection() throws ClassNotFoundException, SQLException, ConfigurationException
{
Class.forName(config.getDriverClass());
Properties properties = new Properties();
if (config.getConnectionProperties() != null)
{
properties.putAll(config.getConnectionProperties());
}
return DriverManager.getConnection(config.getConnectionUrl(), properties);
}
private RecordHandler getRecordHandlerStrategy(
Connection connection, ScriptEngine scriptEngine,
Router router, int threadId)
throws SQLException, ScriptException
{
switch (config.getOperationMode())
{
case INSERT:
return new InsertRecordHandler(config, connection, scriptEngine, router, threadId, numberOfThreads);
case INSERTONLY:
return new InsertOnlyRecordHandler(config, connection, scriptEngine, router, threadId, numberOfThreads);
default:
return new MergeRecordHandler(config, connection, scriptEngine, router, threadId, numberOfThreads);
}
}
}