package io.github.infolis.infolink.querying;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.github.infolis.model.entity.EntityLink;
public class SearchResultScorerTest {
private final static Logger log = LoggerFactory.getLogger(SearchResultScorerTest.class);
private static final String[] candidates = {
"Studierendensurvey 2000/01",
"Studierendensurvey 2001",
"German Social Survey (ALLBUS) Cumulative File, 1980, 1982, 1984, 1986",
"German Social Survey (ALLBUS) Cumulative File, 1980-1992",
"German Social Survey (ALLBUS) Cumulative File, 1980, 1992",
"German Social Survey (ALLBUS) Cumulative File, 1980, 1996",
"German Social Survey (ALLBUS) Cumulative File, 1980 - 1996",
"German Social Survey (ALLBUS) Cumulative File, 1980 1996",
"German Social Survey (ALLBUS) Cumulative File, 1980-1990",
// hard cases: 2. wave != no 2. Filter needs more info to decide which numbers to ignore
"Ausländerumfrage 1982 (1. Welle: Haushaltsvorstände)",
"SFB580-B2 Betriebspanel",
"USICA-Jugend-Studie (Panel: 2. Welle 1979)",
"Ausländer in Deutschland 2000 - 2. Welle",
"CBS News Monthly Poll #2, May 1999",
"Eurobarometer 54.1 (2000)"
};
private static final String[] refNumbers = {
"1996/08",
"1982",
"1982 - 1983",
"85/82",
"54.1",
"2000, 2002",
"2-4",
"2, 3"
};
@Test
public void inRangeTest() {
List<String> range1 = Arrays.asList(new String[]{"2000", "2010"});
String value1 = "2000";
String value2 = "2010";
String value3 = "1999";
String value4 = "2011";
String value5 = "2005";
assertTrue(SearchResultScorer.inRange(range1, value1));
assertTrue(SearchResultScorer.inRange(range1, value2));
assertFalse(SearchResultScorer.inRange(range1, value3));
assertFalse(SearchResultScorer.inRange(range1, value4));
assertTrue(SearchResultScorer.inRange(range1, value5));
}
@Test
public void OverlapTest() {
List<String> range1 = Arrays.asList(new String[]{"2000", "2010"});
List<String> range2 = Arrays.asList(new String[]{"2000", "2010"});
List<String> range3 = Arrays.asList(new String[]{"1900", "2010"});
List<String> range3b = Arrays.asList(new String[]{"1900", "2009"});
List<String> range4 = Arrays.asList(new String[]{"2000", "2020"});
List<String> range4b = Arrays.asList(new String[]{"2001", "2020"});
List<String> range5 = Arrays.asList(new String[]{"1900", "2020"});
List<String> range6 = Arrays.asList(new String[]{"2005", "2006"});
List<String> range7 = Arrays.asList(new String[]{"1900", "1990"});
List<String> range8 = Arrays.asList(new String[]{"2011", "2020"});
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.same_as_temporal)),
SearchResultScorer.overlap(range1, range2, false));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.overlap(range1, range3, false));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal,
EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.overlap(range1, range3b, false));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.overlap(range1, range4, false));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal,
EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.overlap(range1, range4b, false));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.overlap(range1, range5, false));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.overlap(range1, range6, false));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.overlap(range6, range1, true));
assertEquals(null, SearchResultScorer.overlap(range1, range7, false));
assertEquals(null, SearchResultScorer.overlap(range1, range8, false));
}
//TODO test specific relations
@Test
public void filterTest() {
//refNumber[0] = "1996/08"
//0 candidate "Studierendensurvey 2000/01",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[0]));
//1 candidate "Studierendensurvey 2001",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[1]));
//2 candidate "German Social Survey (ALLBUS) Cumulative File, 1980, 1982, 1984, 1986",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[2]));
//3 candidate "German Social Survey (ALLBUS) Cumulative File, 1980-1992",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[3]));
//4 candidate "German Social Survey (ALLBUS) Cumulative File, 1980, 1992",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[4]));
//5 candidate "German Social Survey (ALLBUS) Cumulative File, 1980, 1996",
// TODO if not all enumerated values have a match, superset relation should be added...
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[5]));
//6 candidate "German Social Survey (ALLBUS) Cumulative File, 1980 - 1996",
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[6]));
//7 candidate "German Social Survey (ALLBUS) Cumulative File, 1980 1996",
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[7]));
//8 candidate "German Social Survey (ALLBUS) Cumulative File, 1980-1990",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[8]));
//9 candidate "Ausländerumfrage 1982 (1. Welle: Haushaltsvorstände)",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[9]));
//10 candidate "SFB580-B2 Betriebspanel",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[10]));
//11 candidate "USICA-Jugend-Studie (Panel: 2. Welle 1979)",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[11]));
//12 candidate "Ausländer in Deutschland 2000 - 2. Welle",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[12]));
//13 candidate "CBS News Monthly Poll #2, May 1999",
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[13]));
//14 candidate "Eurobarometer 54.1 (2000)"
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[0], candidates[14]));
//"1982"
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[0]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[1]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[2]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[3]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[4]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[5]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[6]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[7]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[8]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.same_as_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[9]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[10]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[11]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[12]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[13]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[1], candidates[14]));
//"1982 - 1983"
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[0]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[1]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[2]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[3]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[4]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[5]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[6]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[7]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[8]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[9]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[10]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[11]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[12]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[13]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[2], candidates[14]));
//"85/82"
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[0]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[1]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[2]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[3]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[4]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[5]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[6]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[7]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[8]));
// TODO hard case: treat "1982 (1. " properly
//assertEquals(new HashSet<EntityLink.EntityRelation>(
// Arrays.asList(EntityLink.EntityRelation.superset_of_temporal)),
//SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[9]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[10]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[11]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[12]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[13]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[3], candidates[14]));
//"54.1"
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[0]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[1]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[2]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[3]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[4]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[5]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[6]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[7]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[8]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[9]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[10]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[11]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[12]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[13]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.same_as_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[4], candidates[14]));
//"2000, 2002"
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[0]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[1]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[2]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[3]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[4]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[5]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[6]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[7]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[8]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[9]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[10]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[11]));
// hard case: 2000 - 2. is invalid range
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[12]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[13]));
// TODO 'Eurobarometer 54.1 (2000)' <- 2000 is not an additional date to 54.1
//assertEquals(new HashSet<EntityLink.EntityRelation>(
// Arrays.asList(EntityLink.EntityRelation.superset_of_temporal)),
//SearchResultScorer.numericInfoMatches(refNumbers[5], candidates[14]));
//"2-4"
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[0]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[1]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[2]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[3]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[4]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[5]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[6]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[7]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[8]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[9]));
// bolandka: this test fails because of the numbers in the title which have a different meaning
// "SFB580-B2 Betriebspanel"
// TODO: do we want to find a heuristic to treat such titles correctly?
//assertFalse(SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[10]));
// same here: 2nd wave != no. 2. "USICA-Jugend-Studie (Panel: 2. Welle 1979)",
//assertFalse(SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[11]));
// same: "Ausländer in Deutschland 2000 - 2. Welle
//assertFalse(SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[12]));
assertTrue(null != SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[13]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[6], candidates[14]));
//"2, 3"
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[0]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[1]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[2]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[3]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[4]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[5]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[6]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[7]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[8]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[9]));
// see above
//assertFalse(SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[10]));
//assertFalse(SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[11]));
//assertFalse(SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[12]));
assertEquals(new HashSet<EntityLink.EntityRelation>(
Arrays.asList(EntityLink.EntityRelation.part_of_temporal)),
//EntityLink.EntityRelation.superset_of_temporal)),
SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[13]));
assertEquals(null, SearchResultScorer.numericInfoMatches(refNumbers[7], candidates[14]));
}
}