package org.streaminer.stream.cardinality;
import java.util.TreeSet;
import org.streaminer.util.hash.Hash;
/**
* K-Minimum Values.
* Python Source Code: https://github.com/mynameisfiber/countmemaybe
* @author Maycon Viana Bordin <mayconbordin@gmail.com>
*/
public class KMinValues implements IBaseCardinality {
private TreeSet<Integer> kMin;
private int k;
private Hash hasher;
public KMinValues(int k) {
this(k, Hash.getInstance(Hash.MURMUR_HASH3));
}
public KMinValues(int k, Hash hasher) {
this.kMin = new TreeSet<Integer>();
this.k = k;
this.hasher = hasher;
}
public boolean offer(Object key) {
int idx = index(key);
if (kMin.size() < k) {
if (!kMin.contains(idx)) {
kMin.add(idx);
return true;
}
} else {
if (idx < kMin.last())
if (!kMin.contains(idx)) {
kMin.pollLast();
kMin.add(idx);
return true;
}
}
return false;
}
public long cardinality() {
if (kMin.size() < k)
return kMin.size();
else
return (long) cardHelp(kMin, k);
}
public void union(KMinValues... others) {
int newK = smallestK(others);
for (KMinValues o : others)
kMin.addAll(o.kMin);
kMin = new TreeSet<Integer>(kMin.subSet(0, newK));
}
public double jaccard(KMinValues other) {
DirectSum ds = directSum(other);
return ds.n / (1.0 * ds.x.size());
}
public double cardinalityUnion(KMinValues... others) {
DirectSum ds = directSum(others);
double cardX = cardHelp(ds.x, ds.x.size());
return cardX;
}
private double cardHelp(TreeSet<Integer> kMin, int k) {
return ((k - 1.0) * Integer.MAX_VALUE) / (kMin.last());
}
private boolean inAll(int item, KMinValues... others) {
for (KMinValues o : others)
if (!o.kMin.contains(item))
return false;
return true;
}
private DirectSum directSum(KMinValues... others) {
DirectSum ds = new DirectSum();
int k = smallestK(others);
for (KMinValues o : others)
ds.x.addAll(o.kMin);
ds.x = new TreeSet<Integer>(ds.x.subSet(0, k));
for (int item : ds.x)
if (kMin.contains(item) && inAll(item, others))
ds.n++;
return ds;
}
private int smallestK(KMinValues... others) {
int newK = Integer.MAX_VALUE;
for (KMinValues o : others) {
if (o.k < newK)
newK = o.k;
}
return newK;
}
private int index(Object key) {
return hasher.hash(key) & Integer.MAX_VALUE;
}
private static class DirectSum {
public int n = 0;
public TreeSet<Integer> x;
}
}