package no.priv.garshol.duke.comparators;
import org.junit.Test;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;
public class JaroWinklerTest {
@Test
public void testEmpty() {
assertEquals(1.0, JaroWinkler.similarity("", ""));
}
@Test
public void testEqual() {
assertEquals(1.0, JaroWinkler.similarity("abc", "abc"));
}
@Test
public void testEqual2() {
assertEquals(1.0, JaroWinkler.similarity("ab", "ab"));
}
@Test
public void testEqual1() {
assertEquals(1.0, JaroWinkler.similarity("a", "a"));
}
@Test
public void testEqual4() {
assertEquals(1.0, JaroWinkler.similarity("abcd", "abcd"));
}
@Test
public void testTotallyDifferent() {
assertEquals(0.0, JaroWinkler.similarity("abc", "def"));
}
@Test
public void testWikipedia1() {
double score = (4/6.0 + 4/5.0 + (4-0)/4.0)/3.0;
score = score + ((1 * (1 - score)) / 10); // prefix
assertEquals(score, JaroWinkler.similarity("DwAyNE", "DuANE"));
}
@Test
public void testWikipedia2() {
double score = (6/6.0 + 6/6.0 + (6-1)/6.0)/3.0;
score = score + ((3 * (1 - score)) / 10); // prefix
assertEquals(score, JaroWinkler.similarity("MARTHA", "MARHTA"));
}
@Test
public void testWikipedia3() {
double score = (3/5.0 + 3/5.0 + (3-0)/3.0)/3.0;
assertEquals(score, JaroWinkler.similarity("CRATE", "TRACE"));
}
@Test
public void testWikipedia4() {
double score = (4/5.0 + 4/8.0 + (4-0)/4.0)/3.0;
score = score + ((2 * (1 - score)) / 10); // prefix
assertEquals(score, JaroWinkler.similarity("DIXON", "DICKSONX"));
}
@Test
public void testYancey1() {
double score = (5/6.0 + 5/8.0 + (5-1)/5.0)/3.0;
assertEquals(score, JaroWinkler.similarity("anderson", "barnes"));
}
// following tests from Winkler, William E. 2006. Overview of Record
// Linkage and Current Research Directions. Statistical Research
// Division, U.S. Census Bureau.
// http://www.census.gov/srd/papers/pdf/rrs2006-02.pdf
@Test
public void testWinkler1() {
roughlyEquals(0.982,
JaroWinkler.similarity("SHACKLEFORD", "SHACKELFORD"));
}
@Test
public void testWinkler2() {
roughlyEquals(0.896,
JaroWinkler.similarity("DUNNINGHAM", "CUNNIGHAM"));
}
@Test
public void testWinkler3() {
roughlyEquals(0.956,
JaroWinkler.similarity("NICHLESON", "NICHULSON"));
}
@Test
public void testWinkler4() {
roughlyEquals(0.832,
JaroWinkler.similarity("JONES", "JOHNSON"));
}
@Test
public void testWinkler5() {
roughlyEquals(0.933,
JaroWinkler.similarity("MASSEY", "MASSIE"));
}
@Test
public void testWinkler6() {
roughlyEquals(0.922,
JaroWinkler.similarity("ABROMS", "ABRAMS"));
}
@Test
public void testWinkler7() {
roughlyEquals(0.722, // winkler's table says 0.0, which makes no sense
JaroWinkler.similarity("HARDIN", "MARTINEZ"));
}
@Test
public void testWinkler8() {
roughlyEquals(0.467, // winkler's table says 0.0, which makes no sense
JaroWinkler.similarity("ITMAN", "SMITH"));
}
@Test
public void testWinkler9() {
roughlyEquals(0.926,
JaroWinkler.similarity("JERALDINE", "GERALDINE"));
}
@Test
public void testWinkler10() {
roughlyEquals(0.921,
JaroWinkler.similarity("MICHELLE", "MICHAEL"));
}
@Test
public void testWinkler11() {
roughlyEquals(0.933,
JaroWinkler.similarity("JULIES", "JULIUS"));
}
@Test
public void testWinkler12() {
roughlyEquals(0.88,
JaroWinkler.similarity("TANYA", "TONYA"));
}
@Test
public void testWinkler13() {
roughlyEquals(0.805,
JaroWinkler.similarity("SEAN", "SUSAN"));
}
@Test
public void testWinkler14() {
roughlyEquals(0.933,
JaroWinkler.similarity("JON", "JOHN"));
}
private void roughlyEquals(double d1, double d2) {
assertTrue("too different: " + d1 + " != " + d2,
Math.abs(d1 - d2) < 0.01);
}
}