package org.streaminer.stream.quantile;
import org.streaminer.stream.model.SlidingWindow;
import org.streaminer.util.math.PowerOfTwo;
import java.io.Serializable;
import java.util.Collections;
import java.util.LinkedList;
/**
* This deterministic quantile estimator loosely adapts the idea described in chapter 5 of <i>
* "Approximate Counts and Quantiles over Sliding Windows"</i> by <i>Arvind Arasu</i> and <i>Gurmeet
* Singh Manku</i>. In detail the data structure described there is used in this class. The query
* algorithm used here differs from the paper. In a nutshell the data structure works as follows:<br>
* The fixed size window (please refer to {@link #setWindowSize(int)} to change the default value)
* of a stream gets copied L times, which generated L <it>levels</it>. At one level the stream gets
* partitioned into equal sized blocks. Each level has a block size of it's own. <br>
* At each block runs an instance of {@link GKQuantiles}. To query a quantile, the blocks are used to
* reassemble the window, choosing disjoint blocks of different level, starting with big blocks and
* filling the remaining spaces using smaller sized blocks.<br>
*
* @author Markus Kokott
*/
public class WindowSketchQuantiles implements IQuantiles<Double>, Serializable {
private static final long serialVersionUID = 8629450116663341157L;
private Long elementCount;
private int windowSize;
private int maxLevel;
private GKQuantiles initialGK;
private LinkedList<Block> levels = new LinkedList<Block>();
private LinkedList<SlidingWindow<Quantiles>> quantiles;
/**
* This value specifies the error bound.
*/
protected double epsilon;
/**
* @param epsilon <code>double</code> that represents the error bound.
*/
public WindowSketchQuantiles(double epsilon) {
if (epsilon <= 0 || epsilon >= 1) {
throw new RuntimeException("An appropriate epsilon value must lay between 0 and 1.");
}
initialGK = new GKQuantiles(epsilon);
this.quantiles = new LinkedList<SlidingWindow<Quantiles>>();
this.elementCount = 0L;
this.setWindowSize(32768);
Double value = 1 / epsilon;
this.epsilon = 1 / PowerOfTwo.floorToNext(value);
this.prepareLevels();
}
@Override
public void offer(Double value) {
this.incrementCount();
this.insertElement(value);
if (this.elementCount < this.windowSize) {
this.initialGK.offer(value);
}
this.slideWindow();
}
@Override
public Double getQuantile(double q) throws QuantilesException {
if (this.elementCount < this.windowSize){
return this.initialGK.getQuantile(q);
}
//LinkedList<ComparableWrapper<Double, Float>> sortedSummary = this.getOrderedSummary();
LinkedList<Double> summary = this.getFinalSummary();
Double rank = q * summary.size();
Double quantile = summary.get(rank.intValue());
/*
Double border = this.getBorderElement(phi, (LinkedList<ComparableWrapper<Double,Float>>) sortedSummary.clone());
Double quantile = this.getPhiElement(phi, border, sortedSummary);
*/
return quantile;
}
/**
* Creates one block per level. This block is the block with state <b>UNDER-CONSTRUCTION</b> that will
* be filled with new arriving elements. The blocks are stored in increasing order.
*/
private void prepareLevels() {
this.computeMaximumLevel();
Double blockSize = this.computeMinBlockSize();
Float levelEpsilon = this.computeEpsilonForMinLevel();
for (int i = 0; i < this.maxLevel + 1; i++) {
Block newBlock = new Block (levelEpsilon, blockSize.intValue());
levels.addLast(newBlock);
blockSize *= 2;
levelEpsilon /= 2;
SlidingWindow<Quantiles> newWindow = new SlidingWindow<Quantiles>(this.windowSize);
this.quantiles.add(newWindow);
}
}
/**
* Computes the <code>integer</code> value of the maximum level (which depends on epsilon)
* and puts it into <code>maxLevel</code>.
*/
private void computeMaximumLevel() {
Double maxLevel = Math.log10(4 / epsilon) / Math.log10(2);
this.maxLevel = maxLevel.intValue();
}
/**
* Computes the error bound of the smallest level.
* @return {@link Float} value determining the error of level zero.
*/
private Float computeEpsilonForMinLevel() {
int divisor = 2 * (2* this.maxLevel + 2);
Double epsilon = (this.epsilon * Math.pow(2, this.maxLevel)) / divisor;
return epsilon.floatValue();
}
/**
* Computes the size of blocks at level zero.
* @return the smallest block size represented by a {@link Double} value.
*/
private Double computeMinBlockSize() {
Double minBlockSize = this.epsilon * this.windowSize;
return (minBlockSize.doubleValue() / 4);
}
/**
* By default the window size is set to 32768. You can reset this size any time, but
* please note, that this will delete the current summary. So you'll probably wish to
* set the window size once after you have initiated a new instance of this class.
*
* @param windowSize <code>int</code> value that will be ceiled to the next power of two
* before reseting the window size to that value.
*/
public final void setWindowSize(int windowSize) {
windowSize = PowerOfTwo.ceilToNext(windowSize);
// smaller windows doesn't make any sense
if (windowSize <= 128) {
return;
}
this.quantiles = new LinkedList<SlidingWindow<Quantiles>>();
this.elementCount = 0L;
this.windowSize = windowSize;
this.prepareLevels();
}
/**
* Inserts a given item into the data structure. While a {@link Block} hasn't reached its maximum size,
* the item will just be transfered to an instance of {@link GKQuantiles} managed by this {@link Block}.
* When the {@link Block} becomes full a summary is created and put in a {@link SlidingWindow}.
*/
private void insertElement(Double item) {
for (int i = 0; i < this.maxLevel + 1; i++) {
this.levels.get(i).insert(item);
if (this.elementCount % this.levels.get(i).getBlockSize() == 0) {
Quantiles newQuantiles = new Quantiles (this.levels.get(i).getEpsilon(), this.levels.get(i).getSummary());
this.quantiles.get(i).add(newQuantiles, this.levels.get(i).getBlockSize());
}
}
}
/**
* Increments the element count by one
*/
private void incrementCount() {
this.elementCount++;
}
/**
* This method checks all blocks of each level and creates an ensemble that minimizes
* the epsilon of this ensemble. This is done by choosing disjoint blocks with maximal size,
* i.e. if the block of <code>maxLevel</code> is active, we found the most accurate summary.
* If this block is in the state <b>UNDER-CONSTRUCTION</b> right now, it will take the <b>
* ACTIVE</b> block at level <code>maxLevel - 1</code> and fills the missing partitions by
* choosing appropriate blocks at lower levels.
*
* @return {@link LinkedList} of {@link Quantiles} that contains the summary of the stream that
* have the smallest epsilon compared to all other possible summaries.
*/
private LinkedList<Quantiles> getStreamSummary() {
LinkedList<Quantiles> summary = new LinkedList<Quantiles>();
// if the highest level contains an ACTIVE element, this element will cover the whole window
if (!this.quantiles.get(maxLevel).isEmpty()) {
summary.add(this.quantiles.get(maxLevel).getNewestElement());
return summary;
}
// if there is no ACTIVE element in the highest level we add the only active element of the next
// lower level into the summary. There is at most one element active, because if there would be
// two elements, the block in the highest level must be ACTIVE, too.
Quantiles bigBlock = this.quantiles.get(maxLevel - 1).getNewestElement();
// interval [ 0 ; leftBorder ] not covered yet
int leftBorder = this.quantiles.get(maxLevel - 1).getLifeTime(0);
// interval [ rightBorder ; windowSize ] not covered yet
int rightBorder = this.quantiles.get(maxLevel - 1).getLifeTime(0) + this.quantiles.get(maxLevel - 1).getSize(0);
// there are at most two intervals uncovered. i.e. at the beginning and at the end of the window.
int maxUncovered = this.quantiles.get(0).getSize(0);
// next level
int level = this.maxLevel - 2;
// covering the left uncovered interval
while (level >= 0 && leftBorder > maxUncovered) {
for (int i = this.quantiles.get(level).getAll().size() - 1; i > -1; i--){
if (leftBorder > this.quantiles.get(level).getLifeTime(i)){
leftBorder = this.quantiles.get(level).getLifeTime(i);
summary.addFirst(this.quantiles.get(level).get(i));
}
}
level--;
}
summary.add(bigBlock);
// next level
level = this.maxLevel - 2;
// covering the right uncovered interval
while (level >=0 && rightBorder < this.windowSize - maxUncovered) {
for (int i = 0; i < this.quantiles.get(level).getAll().size(); i++){
if (rightBorder < this.quantiles.get(level).getLifeTime(i) + this.quantiles.get(level).getSize(i)){
rightBorder = this.quantiles.get(level).getLifeTime(i) + this.quantiles.get(level).getSize(i);
summary.addLast(this.quantiles.get(level).get(i));
}
}
level--;
}
return summary;
}
/**
* Because elements of bigger sized blocks are more important than elements of smaller sized
* blocks, this method returns a "weighted" summary. I.e., elements of bigger sized blocks
* will appear more often in the summary than elements of smaller sized blocks.
* @return {@link LinkedList} of {@link Double} representing the window summary.
*/
private LinkedList<Double> getFinalSummary(){
LinkedList<Quantiles> summary = this.getStreamSummary();
LinkedList<Double> finalSummary = new LinkedList<Double>();
// for each block
for (int i = 0; i < summary.size(); i++) {
// for each quantile in the current block
Float weight = this.computeLevelForEpsilon(summary.get(i).getEpsilon());
for (int j = 0; j < summary.get(i).getQuantiles().size(); j++) {
for (int k = 0; k <= weight; k++) {
finalSummary.addAll(summary.get(i).getQuantiles());
}
}
}
Collections.sort(finalSummary);
return finalSummary;
}
/**
* Given a {@link Block}'s value of epsilon this method computes its level.
* @param epsilon - an error parameter
* @return the level at which {@link Block}s with epsilon are found.
*/
private Float computeLevelForEpsilon(Float epsilon) {
Double argument = 2 * epsilon * ( 2 * this.maxLevel +2) / this.epsilon;
Double level = this.maxLevel - Math.log(argument) / Math.log(2);
return level.floatValue();
}
/**
* moves the elements in the {@link SlidingWindow} one position to the end of the window.
*/
private void slideWindow(){
for (int i = 0; i < this.maxLevel + 1; i++) {
this.quantiles.get(i).slideWindowByOnePosition();
}
}
@Override
public String toString(){
StringBuffer s = new StringBuffer();
s.append( getClass().getCanonicalName() );
s.append( " {" );
s.append( " epsilon=" + epsilon );
s.append( " }" );
return s.toString();
}
/**
*
* This inner class holds the {@link GKQuantiles} for a specific level.
* {@link Block}s of different levels vary in their block size and error bound epsilon.
*
*/
public class Block implements Serializable {
private static final long serialVersionUID = 7802824333706107860L;
private Float epsilon;
private Integer blockSize;
private LinkedList<Double> summaryOfLastBlock;
private GKQuantiles quantileEstimator;
public Block(Float epsilon, Integer blockSize) {
this.epsilon = epsilon;
this.blockSize = blockSize;
this.quantileEstimator = new GKQuantiles(epsilon);
this.summaryOfLastBlock = new LinkedList<Double>();
}
public void insert(Double item) {
this.quantileEstimator.offer(item);
if (this.quantileEstimator.getCount() == blockSize) {
this.createSummary();
quantileEstimator = new GKQuantiles(epsilon);
}
}
public Integer getBlockSize(){
return this.blockSize;
}
public LinkedList<Double> getSummary(){
return this.summaryOfLastBlock;
}
public Float getEpsilon(){
return this.epsilon;
}
private void createSummary(){
Float phi = this.epsilon;
LinkedList<Double> summary = new LinkedList<Double>();
while (phi <= 1) {
summary.add(quantileEstimator.getQuantile(phi));
phi += this.epsilon;
}
this.summaryOfLastBlock = summary;
}
}
/**
*
* Just a wrapper class for a summary (i.e. {@link LinkedList} of {@link Double} representing
* quantiles and a {@link Float} value representing its error bound epsilon.
*
*/
public class Quantiles implements Serializable{
private static final long serialVersionUID = -6060440214958903531L;
private Float epsilon;
private LinkedList<Double> quantiles = new LinkedList<Double>();
public Quantiles(Float epsilon, LinkedList<Double> quantiles) {
this.epsilon = epsilon;
this.quantiles = quantiles;
}
public Double getQuantile(float phi) {
int position = Math.round(phi * this.quantiles.size());
return this.quantiles.get(position);
}
public LinkedList<Double> getQuantiles() {
return this.quantiles;
}
public Float getEpsilon() {
return this.epsilon;
}
}
}