package org.streaminer.stream.benchmark; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.junit.Test; import static org.junit.Assert.*; import org.junit.Ignore; import org.streaminer.stream.frequency.decay.DecayFormula; import org.streaminer.stream.frequency.decay.ExpDecayFormula; import org.streaminer.stream.frequency.TimeDecayRealCounting; import org.streaminer.stream.frequency.decay.Quantity; import org.streaminer.stream.frequency.CountMinSketch; import org.streaminer.stream.frequency.CountMinSketchAlt; import org.streaminer.stream.frequency.FrequencyException; import org.streaminer.stream.frequency.RealCounting; import org.streaminer.stream.frequency.TimeDecayCountMinSketch; import org.streaminer.stream.frequency.util.CountEntry; /** * * @author Maycon Viana Bordin <mayconbordin@gmail.com> */ public class CountMinSketchBenchmark { public CountMinSketchBenchmark() { } // TODO add test methods here. // The methods must be annotated with annotation @Test. For example: // @Test @Ignore public void accuracy() throws IOException, FrequencyException { int seed = 7364181; double epsOfTotalCount = 0.0001; double confidence = 0.99; CountMinSketchAlt sketch = new CountMinSketchAlt(epsOfTotalCount, confidence, seed); RealCounting<String> exact = new RealCounting<String>(); BufferedReader br = new BufferedReader(new FileReader("/home/maycon/Downloads/track2/sample.txt")); String line; long numItems = 0; while ((line = br.readLine()) != null) { String[] record = line.split("\t"); String key = record[7] + ":" + record[3]; sketch.add(key); exact.add(key); numItems++; } int numErrors = 0; for (String key : exact.keySet()) { long real = exact.estimateCount(key); long aprox = sketch.estimateCount(key); double ratio = 1.0 * (sketch.estimateCount(key) - exact.estimateCount(key)) / numItems; if (ratio > 1.0001) { numErrors++; } } double pCorrect = 1 - 1.0 * numErrors / exact.keySet().size(); assertTrue("Confidence not reached: required " + confidence + ", reached " + pCorrect, pCorrect > confidence); System.out.println(String.format("Required confidence %f, reached %f", confidence, pCorrect)); } @Test public void timeDecayCountMinSketch() throws IOException, FrequencyException { int seed = 7364181; double epsOfTotalCount = 0.0001; double confidence = 0.99; DecayFormula decay = new ExpDecayFormula(60 * 60); TimeDecayRealCounting<String> map = new TimeDecayRealCounting(decay); TimeDecayCountMinSketch timeSketch = new TimeDecayCountMinSketch(epsOfTotalCount, confidence, seed, decay); RealCounting<String> exact = new RealCounting<String>(); CountMinSketchAlt sketch = new CountMinSketchAlt(epsOfTotalCount, confidence, seed); BufferedReader br = new BufferedReader(new FileReader("/home/mayconbordin/Projects/datasets/ctr/track2/sample.txt")); String line; long numItems = 0; while ((line = br.readLine()) != null) { String[] record = line.split("\t"); String key = record[7] + ":" + record[3]; long timestamp = System.currentTimeMillis(); map.add(key, 1, timestamp); exact.add(key); timeSketch.add(key, 1, timestamp); sketch.add(key); numItems++; } for (String key : map.keySet()) { if (exact.estimateCount(key) < 20) continue; long timestamp = System.currentTimeMillis(); System.out.println(String.format("[%s]\t%d\t%f\t%f\t%d", key, exact.estimateCount(key), map.estimateCount(key, timestamp), timeSketch.estimateCount(key, timestamp), sketch.estimateCount(key))); } //////////////////////////////////////////////////////////////////////// int numErrors = 0; for (String key : exact.keySet()) { long real = exact.estimateCount(key); long aprox = sketch.estimateCount(key); double ratio = 1.0 * (sketch.estimateCount(key) - exact.estimateCount(key)) / numItems; if (ratio > 1.0001) { numErrors++; } } double pCorrect = 1 - 1.0 * numErrors / exact.keySet().size(); //assertTrue("Confidence not reached: required " + confidence + ", reached " + pCorrect, pCorrect > confidence); System.out.println(String.format("Required confidence %f, reached %f", confidence, pCorrect)); } }