package no.priv.garshol.duke.datasources;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collection;
import no.priv.garshol.duke.DukeConfigException;
import no.priv.garshol.duke.DukeException;
import no.priv.garshol.duke.Record;
import no.priv.garshol.duke.RecordIterator;
import no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
public class CSVDataSourceTest {
private CSVDataSource source;
@Before
public void setup() {
source = new CSVDataSource();
}
@Test
public void testEmpty() throws IOException {
RecordIterator it = read("");
assertTrue(!it.hasNext());
}
@Test
public void testSingleRecord() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", null, null, null));
source.addColumn(new Column("F3", null, null, null));
RecordIterator it = read("F1,F2,F3\na,b,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("b", r.getValue("F2"));
assertEquals("c", r.getValue("F3"));
}
@Test
public void testSingleRecordWithComment() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", null, null, null));
source.addColumn(new Column("F3", null, null, null));
source.setSkipLines(1);
RecordIterator it = read("# this is a comment\nF1,F2,F3\na,b,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("b", r.getValue("F2"));
assertEquals("c", r.getValue("F3"));
}
@Test
public void testSingleRecordWithoutHeader() throws IOException {
source.addColumn(new Column("1", "F1", null, null));
source.addColumn(new Column("2", "F2", null, null));
source.addColumn(new Column("3", "F3", null, null));
source.setHeaderLine(false);
RecordIterator it = read("a,b,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("b", r.getValue("F2"));
assertEquals("c", r.getValue("F3"));
}
@Test
public void testSingleRecordWithoutHeaderExtraColumn() throws IOException {
source.addColumn(new Column("1", "F1", null, null));
source.addColumn(new Column("2", "F2", null, null));
source.addColumn(new Column("3", "F3", null, null));
source.setHeaderLine(false);
RecordIterator it = read("a,b,c,d");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("b", r.getValue("F2"));
assertEquals("c", r.getValue("F3"));
}
@Test
public void testSingleRecordWithoutHeaderSkipColumn() throws IOException {
source.addColumn(new Column("1", "F1", null, null));
source.addColumn(new Column("3", "F3", null, null));
source.setHeaderLine(false);
RecordIterator it = read("a,b,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals(null, r.getValue("F2"));
assertEquals("c", r.getValue("F3"));
}
@Test
public void testColumnNotInHeader() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", null, null, null));
source.addColumn(new Column("F3", null, null, null));
source.addColumn(new Column("F4", null, null, null));
try {
RecordIterator it = read("F1,F2,F3\na,b,c");
Record r = it.next();
fail("Didn't catch missing column F4");
} catch (DukeConfigException e) {
// caught the configuration mistake
}
}
@Test
public void testHeaderNotInConfig() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", null, null, null));
source.addColumn(new Column("F3", null, null, null));
source.addColumn(new Column("F4", null, null, null));
try {
RecordIterator it = read("F5,F2,F3\na,b,c");
Record r = it.next();
fail("Didn't catch unknown column F5");
} catch (DukeConfigException e) {
// caught the configuration mistake
}
}
@Test
public void testSplitting() throws IOException {
source.addColumn(new Column("F1", null, null, null));
Column c = new Column("F2", null, null, null);
c.setSplitOn(";");
source.addColumn(c);
source.addColumn(new Column("F3", null, null, null));
RecordIterator it = read("F1,F2,F3\na,b;d;e,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("c", r.getValue("F3"));
Collection<String> values = r.getValues("F2");
assertEquals(3, values.size());
assertTrue(values.contains("b"));
assertTrue(values.contains("d"));
assertTrue(values.contains("e"));
}
@Test
public void testSplittingCleaning() throws IOException {
source.addColumn(new Column("F1", null, null, null));
Column c = new Column("F2", null, null, new LowerCaseNormalizeCleaner());
c.setSplitOn(";");
source.addColumn(c);
source.addColumn(new Column("F3", null, null, null));
RecordIterator it = read("F1,F2,F3\na, b ; d ; e ,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("c", r.getValue("F3"));
Collection<String> values = r.getValues("F2");
assertEquals(3, values.size());
assertTrue(values.contains("b"));
assertTrue(values.contains("d"));
assertTrue(values.contains("e"));
}
@Test
public void testNoValueForEmpty() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", null, null, null));
source.addColumn(new Column("F3", null, null, null));
RecordIterator it = read("F1,F2,F3\na,b,");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("b", r.getValue("F2"));
assertEquals(r.getValue("F3"), null);
}
@Test
public void testNoValueForEmptySplit() throws IOException {
source.addColumn(new Column("F1", null, null, null));
Column c = new Column("F2", null, null, null);
c.setSplitOn(";");
source.addColumn(c);
source.addColumn(new Column("F3", null, null, null));
RecordIterator it = read("F1,F2,F3\na,b;;e,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("c", r.getValue("F3"));
Collection<String> values = r.getValues("F2");
assertEquals(2, values.size());
assertTrue(values.contains("b"));
assertTrue(values.contains("e"));
}
@Test
public void testSeparator() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", null, null, null));
source.addColumn(new Column("F3", null, null, null));
RecordIterator it = read("F1;F2;F3\na;b;c", ';');
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("b", r.getValue("F2"));
assertEquals("c", r.getValue("F3"));
}
@Test
public void testMissingHeader() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", null, null, null));
source.addColumn(new Column("F3", null, null, null));
try {
RecordIterator it = read("", ';');
fail("accepted file with no header");
} catch (DukeException e) {
// as wanted
}
}
@Test
public void testUseColumnTwice() throws IOException {
source.addColumn(new Column("F1", null, null, null));
source.addColumn(new Column("F2", "F2a", null,
new LowerCaseNormalizeCleaner()));
source.addColumn(new Column("F2", "F2b", null, null));
source.addColumn(new Column("F3", null, null, null));
RecordIterator it = read("F1,F2,F3\na,B,c");
Record r = it.next();
assertEquals("a", r.getValue("F1"));
assertEquals("b", r.getValue("F2a"));
assertEquals("B", r.getValue("F2b"));
assertEquals("c", r.getValue("F3"));
}
// ===== UTILITIES
private RecordIterator read(String csvdata) {
return read(csvdata, ',');
}
private RecordIterator read(String csvdata, char separator) {
source.setReader(new StringReader(csvdata));
source.setSeparator(separator);
return source.getRecords();
}
}