/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package org.streaminer.stream.frequency; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.junit.Test; import static org.junit.Assert.*; import org.streaminer.stream.frequency.util.CountEntry; /** * * @author Maycon Viana Bordin <mayconbordin@gmail.com> */ public class SimpleTopKCountingTest { private Random random = new Random(); public SimpleTopKCountingTest() { } /** * Test of add method, of class SimpleTopKCounting. */ @Test public void testIntegerSet() throws Exception { int numItems = 1000; int[] xs = new int[numItems]; int maxScale = 20; for (int i = 0; i < xs.length; ++i) { int scale = random.nextInt(maxScale); xs[i] = random.nextInt(1 << scale); } SimpleTopKCounting<Integer> counter = new SimpleTopKCounting(maxScale); for (int v : xs) { counter.add(v); } int count = 0; List<CountEntry<Integer>> topk = counter.getFrequentItems(); Collections.sort(topk); List<Integer> frequentItems = new ArrayList<Integer>(); CountEntry<Integer> lastItem = topk.get(topk.size() - 1); double epsilon = 1.0/(double)maxScale; double threshold = epsilon * (double)numItems; for (CountEntry<Integer> item : topk) { count += item.getFrequency(); frequentItems.add(item.getItem()); } // sum of all counters should be equal to number of items on the stream (n) assertEquals("Sum of all counter should be equal to stream size", count, numItems); // smallest counter value should be at most epsilon*n assertTrue("Smallest counter value should be at most epsilon*n, actual: " + lastItem.getFrequency() + " <= " + threshold, lastItem.getFrequency() <= threshold); // calculates actual frequencies RealCounting<Integer> actualFreq = new RealCounting<Integer>(); for (int v : xs) { actualFreq.add(v); } for (CountEntry<Integer> item : actualFreq.getFrequentItems()) { // check if all items whose count > epsilon*n have been stored if (item.getFrequency() > (epsilon*numItems)) { assertTrue("Any item whose count > epsilon*n should be stored", frequentItems.contains(item.getItem())); } // check if non-stored items have count <= min count if (!counter.contains(item.getItem())) { assertTrue("Count of non-stored items should be at most the min count stored", item.getFrequency() <= lastItem.getFrequency()); } } } }