package io.github.infolis.model;
import io.github.infolis.model.entity.InfolisPattern;
import static org.junit.Assert.assertEquals;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import io.github.infolis.InfolisBaseTest;
import io.github.infolis.infolink.patternLearner.Reliability;
import io.github.infolis.model.entity.Entity;
import io.github.infolis.util.MathUtils;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class InfolisPatternTest extends InfolisBaseTest {
private static final Logger log = LoggerFactory.getLogger(InfolisPatternTest.class);
private static final List<String> contextStrings = Arrays.asList(
"bar foo bar", "bar foo bar", "bar foo bar", "bar foo bar",
"foO bar foO", "foO bar foO", "foO bar foO",
"fOo bAr fOo",
"bar bar bar", "bar bar bar");
private static InfolisPattern pat = new InfolisPattern();
public InfolisPatternTest() throws Exception {
pat.setPatternRegex("foO\\s(.*?)\\sfoO");
String[] testStrings = new String[contextStrings.size()];
createTestTextFiles(10, contextStrings.toArray(testStrings));
}
@Test
public void testInfolisPatternStringString() throws Exception {
dataStoreClient.post(InfolisPattern.class, pat);
log.debug(pat.getUri());
}
/*
* @Test
public void testIsRelevant() throws Exception {
pat.setThreshold(0.0);
assertTrue(pat.isRelevant(contextStrings));
pat.setThreshold(0.3);
assertTrue(pat.isRelevant(contextStrings));
pat.setThreshold(1.0);
assertFalse(pat.isRelevant(contextStrings));
}*/
@Test
public void testIsReliable() throws Exception {
int dataSize = contextStrings.size();
//String leftText, String reference, String rightText, String textFile, String pattern, String mentionsReference
List<TextualReference> contexts_pattern = Arrays.asList(
new TextualReference("foO", "bar", "foO", "textfile1", "pattern", "ref"),
new TextualReference("foO", "bar", "foO", "textfile2", "pattern", "ref"),
new TextualReference("foO", "bar", "foO", "textfile3", "pattern", "ref"));
Set<String> reliableInstanceTerms = new HashSet<>();
Set<Entity> reliableInstances = new HashSet<>();
Set<TextualReference> contexts = new HashSet<>();;
Reliability r = new Reliability();
String seed = "bar";
reliableInstanceTerms.add(seed);
r.setSeedTerms(reliableInstanceTerms);
TextualReference context_bar_0 = new TextualReference("bar", "bar", "bar", "document4", "pattern","ref");
TextualReference context_bar_1 = new TextualReference("bar", "bar", "bar", "document5", "pattern","ref");
TextualReference context_bar_2 = new TextualReference("foO", "bar", "foO", "document6", "pattern","ref");
TextualReference context_bar_3 = new TextualReference("foO", "bar", "foO", "document7", "pattern","ref");
TextualReference context_bar_4 = new TextualReference("foO", "bar", "foO", "document8", "pattern","ref");
contexts.add(context_bar_0);
contexts.add(context_bar_1);
contexts.add(context_bar_2);
contexts.add(context_bar_3);
contexts.add(context_bar_4);
Entity bar = new Entity(seed);
bar.setTextualReferences(contexts);
reliableInstances.add(bar);
double p_x = 5 / 10.0; // "bar" occurs 5 times as instance in all data
double p_y = 3 / 10.0; // bar_patt occurs 3 times
double p_xy = 3 / 10.0; // "bar" instance and bar_pat occur jointly 3 times
double pmi_score = MathUtils.pmi(p_xy, p_x, p_y);
log.debug("initial pmi_score: " + pmi_score);
pat.addAssociation("bar", pmi_score);
bar.addAssociation(pat.getPatternRegex(), pmi_score);
r.addInstance(bar);
r.addPattern(pat);
r.setMaxPmi(pmi_score);
pat.setTextualReferences(contexts_pattern);
double expectedReliability = r.reliability(pat, new HashSet<String>());
pat.isReliable(dataSize, reliableInstances, r);
assertEquals(expectedReliability, pat.getPatternReliability(), 0.0);
log.debug("Expected reliability: " + expectedReliability);
log.debug("Computed reliability: " + pat.getPatternReliability());
}
}