package com.anjlab.csv2db;
import java.sql.Connection;
import java.sql.Date;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.junit.Assert;
import org.junit.Test;
import com.anjlab.csv2db.Configuration.OperationMode;
public class ImporterTest
{
@Test
public void testImport() throws Exception
{
Configuration config = Configuration.fromJson(
"src/test/resources/test-config.json");
config.getCsvOptions().setEscapeChar((char) 0);
Importer importer = new Importer(config, 1, null);
Connection connection = importer.createConnection();
dropTableIfExists(connection);
connection.createStatement()
.executeUpdate(
"create table companies_house_records (" +
"id timestamp not null," +
"company_name varchar(160)," +
"company_number varchar(8)," +
"address_line_1 varchar(300)," +
"address_line_2 varchar(300)," +
"updated_at date" +
")");
importer.performImport("src/test/resources/test-data.csv");
// For the first time the data will be inserted, we don't insert any values to updated_at in this test case
assertRecordCount(connection, getExpectedDataset(false), true);
importer.performImport("src/test/resources/test-data.csv");
// Data will be updated and we should have updated_at set
assertRecordCount(connection, getExpectedDataset(true), true);
connection.close();
config.setOperationMode(OperationMode.INSERT);
config.setBatchSize(3);
importer = new Importer(config, 1, null);
connection = importer.createConnection();
importer.performImport("src/test/resources/test-data.csv");
List<Object[]> expectedDataset = new ArrayList<Object[]>();
expectedDataset.addAll(getExpectedDataset(true));
expectedDataset.addAll(getExpectedDataset(false));
sortDatasetByCompanyNameAndUpdateDate(expectedDataset);
assertRecordCount(connection, expectedDataset, true);
// Test import from ZIP
importer.performImport("src/test/resources/test-data.zip");
expectedDataset.addAll(getExpectedDataset(false));
sortDatasetByCompanyNameAndUpdateDate(expectedDataset);
assertRecordCount(connection, expectedDataset, true);
// Test InsertOnly
connection.close();
config.setOperationMode(OperationMode.INSERTONLY);
config.setBatchSize(3);
importer = new Importer(config, 1, null);
connection = importer.createConnection();
importer.performImport("src/test/resources/test-data.csv");
assertRecordCount(connection, expectedDataset, true);
connection.createStatement()
.executeUpdate("delete from companies_house_records");
config.setIgnoreNullPK(true);
importer.performImport("src/test/resources/test-data.csv");
// For the first time the data will be inserted, we don't insert any values to updated_at in
// this test case
assertRecordCount(connection, getExpectedDataset(false), true);
}
@Test
public void testMultithreadImport() throws Exception
{
Configuration config = Configuration.fromJson(
"src/test/resources/test-config.json");
config.getCsvOptions().setEscapeChar((char) 0);
Importer importer = new Importer(config, 4, null);
Connection connection = importer.createConnection();
dropTableIfExists(connection);
connection.createStatement()
.executeUpdate(
"create table companies_house_records (" +
"id timestamp not null," +
"company_name varchar(160)," +
"company_number varchar(8)," +
"address_line_1 varchar(300)," +
"address_line_2 varchar(300)," +
"updated_at date" +
")");
importer.performImport("src/test/resources/test-data.csv");
// For the first time the data will be inserted, we don't insert any values to updated_at in this test case
assertRecordCount(connection, getExpectedDataset(false), true);
importer.performImport("src/test/resources/test-data.csv");
// Data will be updated and we should have updated_at set
assertRecordCount(connection, getExpectedDataset(true), true);
connection.close();
config.setOperationMode(OperationMode.INSERT);
config.setBatchSize(3);
importer = new Importer(config, 4, null);
connection = importer.createConnection();
importer.performImport("src/test/resources/test-data.csv");
List<Object[]> expectedDataset = new ArrayList<Object[]>();
expectedDataset.addAll(getExpectedDataset(true));
expectedDataset.addAll(getExpectedDataset(false));
sortDatasetByCompanyNameAndUpdateDate(expectedDataset);
assertRecordCount(connection, expectedDataset, true);
// Test import from ZIP
importer.performImport("src/test/resources/test-data.zip");
expectedDataset.addAll(getExpectedDataset(false));
sortDatasetByCompanyNameAndUpdateDate(expectedDataset);
assertRecordCount(connection, expectedDataset, true);
}
private List<Object[]> getExpectedDataset(boolean withDate)
{
Calendar cal = Calendar.getInstance();
cal.clear(Calendar.HOUR);
cal.clear(Calendar.MINUTE);
cal.clear(Calendar.SECOND);
cal.clear(Calendar.MILLISECOND);
Date today = withDate ? new Date(cal.getTimeInMillis()) : null;
return Arrays.asList(
new Object[]{"! LTD", "08209948", "METROHOUSE 57 PEPPER ROAD", "HUNSLET", today},
new Object[]{"!BIG IMPACT GRAPHICS LIMITED", "07382019", "335 ROSDEN HOUSE", "372 OLD STREET", today},
new Object[]{"!NFERNO LTD.", "04753368", "FIRST FLOOR THAVIES INN HOUSE 3-4", "HOLBORN CIRCUS", today},
new Object[]{"!NSPIRED LTD", "SC421617", "12 BON ACCORD SQUARE", "", today},
new Object[]{"!OBAC INSTALLATIONS LIMITED", "07527820", "DEVONSHIRE HOUSE", "60 GOSWELL ROAD", today},
new Object[]{"!OBAC UK LIMITED", "07687209", "DEVONSHIRE HOUSE", "60 GOSWELL ROAD", today},
new Object[]{"!ST MEDIA SOUTHAMPTON LTD", "07904170", "10 NORTHBROOK HOUSE", "FREE STREET, BISHOPS WALTHAM", today},
new Object[]{"ALJOH B.V.", "SF000899", "ALEXANDER HINSHELWOOD BARR", "\"SHALIMAR\"", today},
new Object[]{"ALLEGIS SERVICES (INDIA) PRIVATE LIMITED", "FC027847", "\\54, 1ST MAIN ROAD", "3RD PHASE", today},
new Object[]{"APS DIRECT LIMITED", "05638208", "MANOR COURT CHAMBERS \\", "126 MANOR COURT ROAD", today});
}
protected void assertRecordCount(Connection connection, List<Object[]> expectedData, boolean queryWithUpdateDate) throws SQLException
{
String query;
if (queryWithUpdateDate)
{
query = "SELECT * FROM companies_house_records ORDER BY company_name, updated_at";
}
else
{
query = "SELECT * FROM companies_house_records ORDER BY company_name";
}
ResultSet resultSet;
resultSet = connection.createStatement()
.executeQuery(query);
int index = 0;
while (resultSet.next())
{
int columnCount = resultSet.getMetaData().getColumnCount();
for (int i = 2; i <= columnCount; i++)
{
Object columnValue = resultSet.getObject(i);
if (columnValue != null && columnValue instanceof Date)
{
Assert.assertEquals(expectedData.get(index)[i - 2].toString(), columnValue.toString());
}
else
{
Assert.assertEquals(expectedData.get(index)[i - 2], columnValue);
}
}
index++;
}
Assert.assertEquals(expectedData.size(), index);
resultSet.close();
}
@Test
public void testImportWithScripting() throws Exception
{
Configuration config = Configuration.fromJson(
"src/test/resources/test-config-with-scripting.json");
config.getCsvOptions().setEscapeChar((char) 0);
Importer importer = new Importer(config, 1, null);
Connection connection = importer.createConnection();
dropTableIfExists(connection);
connection.createStatement()
.executeUpdate(
"create table companies_house_records (" +
"id timestamp not null," +
"company_name varchar(160)," +
"company_number varchar(8)," +
"generated_value varchar(8)" +
")");
importer.performImport("src/test/resources/test-data.csv");
List<Object[]> dataset = getExpectedDataset(false);
List<Object[]> expectedData = new ArrayList<Object[]>();
for (Object[] row : dataset)
{
expectedData.add(new Object[]{
row[0].toString().toLowerCase(),
row[1].toString(),
StringUtils.reverse(row[1].toString())});
}
assertRecordCount(connection, expectedData, false);
connection.close();
}
@Test
public void testImportWithMap() throws Exception
{
Configuration config = Configuration.fromJson(
"src/test/resources/test-config-with-map.json");
config.getCsvOptions().setEscapeChar((char) 0);
Importer importer = new Importer(config, 2, null);
Connection connection = importer.createConnection();
dropTableIfExists(connection);
connection.createStatement()
.executeUpdate(
"create table companies_house_records (" +
"id timestamp not null," +
"company_name varchar(160)," +
"company_number varchar(8)," +
"generated_value varchar(8)" +
")");
importer.performImport("src/test/resources/test-data.csv");
List<Object[]> dataset = getExpectedDataset(false);
List<Object[]> expectedData = new ArrayList<Object[]>();
for (Object[] row : dataset)
{
expectedData.add(new Object[]{
row[0].toString().toLowerCase(),
row[1].toString(),
StringUtils.reverse(row[1].toString())});
// map function will call emit(nameValues) twice
expectedData.add(new Object[]{
row[0].toString().toLowerCase(),
row[1].toString(),
StringUtils.reverse(row[1].toString())});
}
assertRecordCount(connection, expectedData, false);
connection.close();
}
private void dropTableIfExists(Connection connection)
{
try
{
connection.createStatement()
.executeUpdate("drop table companies_house_records");
}
catch (SQLException e)
{
// OK, table doesn't exist
}
}
private List<Object[]> sortDatasetByCompanyNameAndUpdateDate(List<Object[]> expectedDataset)
{
Collections.sort(expectedDataset, new Comparator<Object>()
{
@Override
public int compare(Object o1, Object o2)
{
Object[] a1 = (Object[]) o1;
Object[] a2 = (Object[]) o2;
return ((String) a1[0] + a1[4]).compareTo((String) a2[0] + a1[4]);
}
});
return expectedDataset;
}
}