package no.priv.garshol.duke.databases; import java.io.IOException; import java.util.ArrayList; import java.util.List; import no.priv.garshol.duke.ConfigurationImpl; import no.priv.garshol.duke.Processor; import no.priv.garshol.duke.Property; import no.priv.garshol.duke.PropertyImpl; import no.priv.garshol.duke.Record; import no.priv.garshol.duke.comparators.Levenshtein; import no.priv.garshol.duke.datasources.InMemoryDataSource; import no.priv.garshol.duke.utils.TestUtils; import org.apache.lucene.index.CorruptIndexException; import org.junit.After; import org.junit.Before; import org.junit.Test; import static junit.framework.Assert.assertEquals; public class RecordLinkTest { private ConfigurationImpl config; private Processor processor; private InMemoryDataSource source1; private InMemoryDataSource source2; private TestUtils.TestListener listener; @Before public void setup() throws CorruptIndexException, IOException { listener = new TestUtils.TestListener(); Levenshtein comp = new Levenshtein(); List<Property> props = new ArrayList(); props.add(new PropertyImpl("ID")); props.add(new PropertyImpl("NAME", comp, 0.3, 0.8)); props.add(new PropertyImpl("EMAIL", comp, 0.3, 0.8)); config = new ConfigurationImpl(); config.setProperties(props); config.setThreshold(0.85); config.setMaybeThreshold(0.8); source1 = new InMemoryDataSource(); source2 = new InMemoryDataSource(); config.addDataSource(1, source1); config.addDataSource(2, source2); processor = new Processor(config, true); processor.addMatchListener(listener); } @After public void cleanup() throws CorruptIndexException, IOException { processor.close(); } @Test public void testEmpty() throws IOException { processor.link(); assertEquals(0, listener.getMatches().size()); assertEquals(0, listener.getRecordCount()); } @Test public void testSimplePair() throws IOException { source1.add(TestUtils.makeRecord("ID", "1", "NAME", "aaaaa", "EMAIL", "bbbbb")); source2.add(TestUtils.makeRecord("ID", "2", "NAME", "aaaaa", "EMAIL", "bbbbb")); processor.link(); assertEquals("bad record count", 1, listener.getRecordCount()); List<TestUtils.Pair> matches = listener.getMatches(); assertEquals("bad number of matches", 1, matches.size()); TestUtils.Pair pair = matches.get(0); if (pair.r1.getValue("ID").equals("2")) { Record r = pair.r1; pair.r1 = pair.r2; pair.r2 = r; } assertEquals("1", pair.r1.getValue("ID")); assertEquals("2", pair.r2.getValue("ID")); } @Test public void testOneMatchOneMiss() throws IOException { source1.add(TestUtils.makeRecord("ID", "1", "NAME", "aaaaa", "EMAIL", "bbbbb")); source2.add(TestUtils.makeRecord("ID", "2", "NAME", "aaaaa", "EMAIL", "bbbbb")); source2.add(TestUtils.makeRecord("ID", "3", "NAME", "xxxx", "EMAIL", "yyyyy")); processor.link(); assertEquals("bad record count", 2, listener.getRecordCount()); List<TestUtils.Pair> matches = listener.getMatches(); assertEquals("bad number of matches", 1, matches.size()); assertEquals("bad number of missed matches", 1, listener.getNoMatchCount()); TestUtils.Pair pair = matches.get(0); if (pair.r1.getValue("ID").equals("2")) { Record r = pair.r1; pair.r1 = pair.r2; pair.r2 = r; } assertEquals("1", pair.r1.getValue("ID")); assertEquals("2", pair.r2.getValue("ID")); } @Test public void testOneMatchOneMiss2() throws IOException { config.setMaybeThreshold(0.0); source1.add(TestUtils.makeRecord("ID", "1", "NAME", "aaaaa", "EMAIL", "bbbbb")); source2.add(TestUtils.makeRecord("ID", "2", "NAME", "aaaaa", "EMAIL", "bbbbb")); source2.add(TestUtils.makeRecord("ID", "3", "NAME", "xxxxx", "EMAIL", "bbbbb")); processor.link(); assertEquals("bad record count", 2, listener.getRecordCount()); List<TestUtils.Pair> matches = listener.getMatches(); assertEquals("bad number of matches", 1, matches.size()); assertEquals("bad number of missed matches", 1, listener.getNoMatchCount()); TestUtils.Pair pair = matches.get(0); if (pair.r1.getValue("ID").equals("2")) { Record r = pair.r1; pair.r1 = pair.r2; pair.r2 = r; } assertEquals("1", pair.r1.getValue("ID")); assertEquals("2", pair.r2.getValue("ID")); } }