package no.priv.garshol.duke.datasources; import java.io.StringReader; import no.priv.garshol.duke.Cleaner; import no.priv.garshol.duke.Record; import no.priv.garshol.duke.RecordIterator; import no.priv.garshol.duke.cleaners.FamilyCommaGivenCleaner; import no.priv.garshol.duke.cleaners.RegexpCleaner; import org.junit.Before; import org.junit.Test; import static junit.framework.Assert.assertEquals; import static junit.framework.Assert.assertFalse; import static junit.framework.Assert.assertTrue; public class NTriplesDataSourceTest { private NTriplesDataSource source; private static final String RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; @Before public void setup() { source = new NTriplesDataSource(); } @Test public void testEmpty() { RecordIterator it = read(""); assertFalse("empty data source contains records", it.hasNext()); } @Test public void testEmptyIncremental() { source.setIncrementalMode(true); testEmpty(); } @Test public void testEmptyBlank() { RecordIterator it = read("\n\n"); assertFalse("empty data source contains records", it.hasNext()); } @Test public void testEmptyBlankInc() { source.setIncrementalMode(true); testEmptyBlank(); } @Test public void testSingleRecord() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, null)); RecordIterator it = read("<http://a> <http://b> \"foo\" .\n"); Record r = it.next(); assertEquals("http://a", r.getValue("ID")); assertEquals("foo", r.getValue("PROP")); assertFalse(it.hasNext()); } @Test public void testSingleRecordInc() { source.setIncrementalMode(true); testSingleRecord(); } @Test public void testSingleRecord2() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, null)); RecordIterator it = read("<http://a> <http://b> \"foo\" .\n" + "<http://a> <http://c> \"foo\" .\n"); Record r = it.next(); assertEquals("http://a", r.getValue("ID")); assertEquals("foo", r.getValue("PROP")); assertFalse(it.hasNext()); } @Test public void testSingleRecord2Inc() { source.setIncrementalMode(true); testSingleRecord2(); } @Test public void testSingleRecord2Spaces() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, null)); RecordIterator it = read("\n<http://a> <http://b> \"foo\" .\n\n" + "<http://a> <http://c> \"foo\" .\n\n"); Record r = it.next(); assertEquals("http://a", r.getValue("ID")); assertEquals("foo", r.getValue("PROP")); assertFalse(it.hasNext()); } @Test public void testSingleRecord2SpacesInc() { source.setIncrementalMode(true); testSingleRecord2Spaces(); } @Test public void testTwoRecords() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, null)); RecordIterator it = read("<http://a> <http://b> \"foo\" .\n" + "<http://a> <http://c> \"foo\" .\n" + "<http://a> <" + RDF_TYPE + "> \"http://d\" .\n" + "<http://e> <http://b> \"bar\" .\n" + "<http://e> <http://c> \"foo\" .\n" + "<http://e> <" + RDF_TYPE + "> \"http://f\" .\n"); Record r = it.next(); checkAorE(r); // we don't know the order assertTrue("second record not found", it.hasNext()); r = it.next(); checkAorE(r); } private void checkAorE(Record r) { if (r.getValue("ID").equals("http://a")) assertEquals("foo", r.getValue("PROP")); else { assertEquals("http://e", r.getValue("ID")); assertEquals("bar", r.getValue("PROP")); } } @Test public void testTwoRecordsInc() { source.setIncrementalMode(true); testTwoRecords(); } @Test public void testTypeFiltering() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, null)); source.setAcceptTypes("http://d"); RecordIterator it = read("<http://a> <http://b> \"foo\" .\n" + "<http://a> <http://c> \"foo\" .\n" + "<http://a> <" + RDF_TYPE + "> \"http://d\" .\n" + "<http://e> <http://b> \"bar\" .\n" + "<http://e> <http://c> \"foo\" .\n" + "<http://e> <" + RDF_TYPE + "> \"http://f\" .\n"); Record r = it.next(); assertEquals("http://a", r.getValue("ID")); assertEquals("foo", r.getValue("PROP")); assertFalse("e record not filtered out", it.hasNext()); } @Test public void testTypeFilteringInc() { source.setIncrementalMode(true); testTypeFiltering(); } @Test public void testSingleRecordDoubleProp() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, null)); // yes, we map b two times. might be necessary to split a compound // value into two different properties. source.addColumn(new Column("http://b", "PROP2", null, null)); RecordIterator it = read("<http://a> <http://b> \"foo\" .\n"); Record r = it.next(); assertEquals("http://a", r.getValue("ID")); assertEquals("foo", r.getValue("PROP")); assertEquals("foo", r.getValue("PROP2")); assertFalse(it.hasNext()); } @Test public void testSingleRecordDoublePropInc() { source.setIncrementalMode(true); testSingleRecordDoubleProp(); } @Test public void testRealData() { String data = "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://data.deichman.no/catalogueName> \"Ahlgren, Ernst\" .\n" + "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://data.deichman.no/lifespan> \"1850-1888\" .\n" + "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://www.foafrealm.org/xfoaf/0.1/nationality> <http://data.deichman.no/nationality/sv> .\n" + "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .\n" + "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://xmlns.com/foaf/0.1/name> \"Ahlgren, Ernst\" .\n" + "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://xmlns.com/foaf/0.1/title> \"psevd. for Victoria Benedictsson\" .\n"; RegexpCleaner birthcleaner = new RegexpCleaner(); birthcleaner.setRegexp("^(\\d\\d\\d\\d)-"); RegexpCleaner deathcleaner = new RegexpCleaner(); deathcleaner.setRegexp("-(\\d\\d\\d\\d)$"); source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://xmlns.com/foaf/0.1/name", "NAME", null, new FamilyCommaGivenCleaner())); source.addColumn(new Column("http://data.deichman.no/lifespan", "YEAROFBIRTH", null, birthcleaner)); source.addColumn(new Column("http://data.deichman.no/lifespan", "YEAROFDEATH", null, deathcleaner)); RecordIterator it = read(data); Record r = it.next(); assertEquals("http://data.deichman.no/person/ahlgren_ernst_1850-1888", r.getValue("ID")); assertEquals("ernst ahlgren", r.getValue("NAME")); assertEquals("1850", r.getValue("YEAROFBIRTH")); assertEquals("1888", r.getValue("YEAROFDEATH")); assertFalse(it.hasNext()); } @Test public void testCleanedNullIsDiscarded() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, new NullCleaner())); RecordIterator it = read("<http://a> <http://b> \"foo\" .\n"); assertFalse(it.hasNext()); } @Test public void testCleanedEmptyIsDiscarded() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, new EmptyCleaner())); RecordIterator it = read("<http://a> <http://b> \"foo\" .\n"); assertFalse(it.hasNext()); } @Test public void testEmptyRecord() { source.addColumn(new Column("?uri", "ID", null, null)); source.addColumn(new Column("http://b", "PROP", null, null)); RecordIterator it = read("<http://a> <http://c> \"foo\" .\n"); assertFalse("failed to filter out empty records", it.hasNext()); } // --- helpers private RecordIterator read(String csvdata) { source.setReader(new StringReader(csvdata)); return source.getRecords(); } static class NullCleaner implements Cleaner { public String clean(String value) { return null; } } static class EmptyCleaner implements Cleaner { public String clean(String value) { return ""; } } }