package org.streaminer.stream.frequency.topk;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.streaminer.stream.frequency.FrequencyException;
import org.streaminer.stream.frequency.util.CountEntry;
/**
* Frequent algorithm.
* https://github.com/gdusbabek/signalbrook
*
* @author Gary Dusbabek
* @param <T>
*/
public class Frequent<T> implements ITopK<T> {
private long elementsCounted = 0;
private boolean over = false;
private int k;
private final Map<T, AtomicLong> dataStructure;
public Frequent(double error) {
this.k = (int)Math.ceil(1d / error);
dataStructure = new HashMap<T, AtomicLong>(k - 1);
}
@Override
public boolean add(T item) throws FrequencyException {
return add(item, 1);
}
@Override
public boolean add(T item, long incrementCount) throws FrequencyException {
if (elementsCounted == Long.MAX_VALUE) {
throw new FrequencyException("Overflowed " + Long.MAX_VALUE);
} else {
elementsCounted += 1;
}
AtomicLong counter = dataStructure.get(item);
if (counter != null) {
counter.addAndGet(incrementCount);
return false;
} else if (dataStructure.size() < k) {
dataStructure.put(item, new AtomicLong(incrementCount));
} else {
long newValue;
List<T> toRemove = new ArrayList<T>();
// decrement every body.
for (Map.Entry<T, AtomicLong> entry : dataStructure.entrySet()) {
newValue = entry.getValue().decrementAndGet();
if (newValue == 0) {
toRemove.add(entry.getKey());
}
}
// remove counters at zero.
if (toRemove.size() > 0) {
for (T t : toRemove) {
dataStructure.remove(t);
}
}
}
return true;
}
@Override
public List<CountEntry<T>> peek(int k) {
List<CountEntry<T>> list = new ArrayList<CountEntry<T>>();
for (Map.Entry<T, AtomicLong> entry : dataStructure.entrySet()) {
list.add(new CountEntry<T>(entry.getKey(), entry.getValue().get()));
}
Collections.sort(list);
return list;
}
public long size() {
return elementsCounted;
}
}