package io.github.infolis.util; import static org.junit.Assert.assertEquals; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.github.infolis.InfolisBaseTest; public class MathUtilsTest extends InfolisBaseTest { private static final Logger log = LoggerFactory.getLogger(MathUtilsTest.class); // see http://www.ijfcc.org/papers/43-T00053.pdf for example values // apparently, they forgot to apply the log though... private static final double dataSize = 10000000.0; private static final double p_x = 24200 / dataSize; // occurrence of term x in all contexts private static final double p_y = 38900 / dataSize; // occurrence of pattern (or term) y private static final double p_xy = 169 / dataSize; // joint occurrence of x and y @Test public void testPmi() { log.debug("p_x: " + p_x); log.debug("p_y: " + p_y); log.debug("p_xy: " + p_xy); log.debug("p_x_y: " + p_x * p_y); log.debug("p_xy / p_x_y: " + p_xy / (p_x * p_y)); double pmi = MathUtils.pmi(p_xy, p_x, p_y); double expectedPmi = MathUtils.log2(1.8); log.debug("pmi: " + pmi); log.debug("expected pmi (rounded): " + expectedPmi); assertEquals(expectedPmi, pmi, 0.01); } }