package org.streaminer.stream.quantile.rss;
import org.streaminer.stream.quantile.IQuantiles;
import org.streaminer.stream.quantile.QuantilesException;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
/**
* A stream version of the rss technique presented by Anna Gilbert,
* Yannis Kotidis, S. Muthukrishanan, and Matrin Strauss in the paper
* "How to summarize the Universe".
*
* @author Carsten Przyluczky
*
*/
public class RSSQuantiles implements IQuantiles<Double>, Serializable {
private static final long serialVersionUID = -7491178942147615981L;
public final static int CANT_ESTIMATE = -1;
public static int ELEMENTS_PER_BUCKET = 200;
private static int MAX_BUCKET_COUNT = 5;
private int maxValue; // represents |U|
private List<Bucket> buckets;
private Bucket newestBucket = null;
private float epsilon;
private float delta;
/**
* The constructor invokes all data-structure creation
*
* @param epsilon precision
* @param delta error-probability
* @param maxValue the maximum value the will be handled by this algorithm (|U|)
*/
public RSSQuantiles(float epsilon, float delta, int maxValue) {
this.epsilon = epsilon;
this.delta = delta;
this.maxValue = maxValue;
buckets = new CopyOnWriteArrayList<Bucket>();
addNewBucket();
}
@Override
public void offer(Double value) {
newestBucket.process(Math.ceil(value));
if(newestBucket.IsFull()){
addNewBucket();
}
deleteExcessiveBuckets();
}
@Override
public Double getQuantile(double q) throws QuantilesException {
int overallBucketCount = overallBucketCount();
int wantedRank = (int)((float)overallBucketCount * (float)q - (float)overallBucketCount * epsilon);
// this loop creates dyadic intervals 0..i and lets all buckets evaluate them.
// then it tests if we have reatched the wanted rank with our sum.
for (int i = 0; i < maxValue;i++) {
LinkedList<Interval> intervals = collectNeededIntervalls(i);
double intervalSum = 0;
for (Bucket bucket : new LinkedList<Bucket>(buckets)) {
intervalSum += Math.abs(bucket.estimateIntervals(intervals));
}
if (intervalSum > wantedRank) {
return (double)i;
}
}
return (double)CANT_ESTIMATE;
}
/**
* create a list of dyadic intervals that will, added together, describe the rank
*
* @param rank
*/
private LinkedList<Interval> collectNeededIntervalls(int rank) {
LinkedList<Interval> intervals = new LinkedList<Interval>();
int log2 = 0;
int chunk = 0;
int lowerBound = 0;
int upperBound = 0;
if (rank == 0) {
intervals.add(new Interval(0, 0));
} else {
rank++; // we need to count the 0 extra
while (rank > 0) {
log2 = (int) (Math.log10((double)rank) / Math.log10(2.0));
chunk = (int)Math.pow(2.0d, (double)log2);
upperBound = lowerBound + chunk - 1;
intervals.add(new Interval(lowerBound, upperBound));
lowerBound = upperBound + 1;
rank -= chunk;
}
}
return intervals;
}
/**
* returns the sum of all {@link Bucket} estimations
*
* @return the sum of all {@link Bucket} estimations
*/
private int overallBucketCount(){
int count = 0;
for (Bucket bucket : buckets){
count += bucket.getElementCount();
}
return count;
}
/**
* create a new {@link Bucket}
*/
private void addNewBucket(){
Bucket newBucket = new Bucket(epsilon, delta, maxValue);
buckets.add(newBucket);
newestBucket = newBucket;
}
/**
* delete oldest {@link Bucket} while we have too many of them.
*/
private void deleteExcessiveBuckets(){
while (buckets.size() > MAX_BUCKET_COUNT) {
buckets.remove(0);
}
}
public void setElementsPerBucket(int newCount) {
ELEMENTS_PER_BUCKET = newCount;
}
public int getElementsPerBucket() {
return ELEMENTS_PER_BUCKET;
}
public void setMaxBucketCount(int newCount) {
MAX_BUCKET_COUNT = newCount;
deleteExcessiveBuckets();
}
public int getMaxBucketCount() {
return MAX_BUCKET_COUNT;
}
}