package org.streaminer.stream.norm;
import org.apache.commons.math3.random.MersenneTwister;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.StableRandomGenerator;
import org.streaminer.util.ArrayUtils;
/**
* Sketches of vectors using stable distributions for Euclidean, Manhattan,
* L_p distances. File originally Gaussian Distribution generator, Nick Koudas.
* Extended to pick from arbitrary stable distributions with alpha in the range
* (0, 2], Graham Cormode.
*
* <a href="http://www.cs.rutgers.edu/~muthu/massdal-code-index.html">Original code</a>
*
* @author Maycon Viana Bordin <mayconbordin@gmail.com>
*/
public class StableSketch implements Comparable<StableSketch> {
/**
* The norm we are working in
*/
private double alpha;
/**
* Length of the sketch
*/
private int sksize;
/**
* Seed for the PRNG
*/
private long seed;
/**
* The sketch
*/
private double[] sk;
private RandomGenerator randomGen;
private StableRandomGenerator stableGen;
public StableSketch(int sksize, double alpha, long seed) {
this.alpha = alpha;
this.sksize = sksize;
this.seed = seed;
sk = new double[sksize];
randomGen = new MersenneTwister(seed);
stableGen = new StableRandomGenerator(randomGen, alpha, 0);
}
public void add(int item, double val) {
seed = item + seed;
randomGen.setSeed(seed);
// for each entry in the vector, create a pseudo-random sequence
// based on its position
for (int j=0; j<sksize; j++) {
// use this to make a sequence of stable variables and
// multiply these by the entry in the vector to maintain the sketch
sk[j] += val * stableGen.nextNormalizedDouble();
}
}
/**
* Sums that sketch with this.
* @param that
*/
public void add(StableSketch that) {
if (this.compareTo(that) == 0) {
for (int i=0; i<sksize; i++)
sk[i] += that.sk[i];
}
}
/**
* Subtracts that sketch with this.
* @param that
*/
public void subtract(StableSketch that) {
if (this.compareTo(that) == 0) {
for (int i=0; i<sksize; i++)
sk[i] -= that.sk[i];
}
}
/**
* Calculates the distance between this sketch and that. Do this by treating
* them by first finding the absolute difference between each component.
* The either use median (for alpha<2) or L2 (for alpha=2) to approximate
* the L_alpha distance.
*
* @param that
* @return The distance
*/
public double distance(StableSketch that) {
if (this.compareTo(that) != 0) return -1.0;
double sum = 0.0;
double _alpha = this.alpha;
double[] holder;
if (_alpha == 2.0) {
for (int i=0; i<sksize; i++)
sum += Math.pow(Math.abs(this.sk[i] - that.sk[i]), 2.0);
} else { // Calculate the L_alpha distance
holder = new double[sksize+1];
for (int i=0; i<sksize; i++)
holder[i+1] = this.sk[i] - that.sk[i];
sum = ArrayUtils.doubleMedSelect(sksize/2, sksize, holder);
}
return sum;
}
/**
* Find the L_p norm of the vector that a sketch represents, and return
* this to the power p -- needed for distinct values and so on.
* @return
*/
public double norm() {
double[] holder;
double sum = 0.0;
double est;
// use the j-l lemma approach for L_2 distance
if (alpha == 2.0) {
for (int i=0; i<sksize; i++) {
est = sk[i] * sk[i];
sum += est;
}
// Calculate the L_alpha norm
sum = Math.pow(sum/((double) sksize), 0.5);
} else {
holder = new double[sksize+1];
for (int i=0; i<sksize; i++)
holder[i+1] = Math.abs(sk[i]);
// transfer the details into a suitable array
sum = ArrayUtils.doubleMedSelect(sksize/2, sksize, holder);
//find the median of the arary, this is the estimator for the
// L_p norm of the vector
if (alpha < 0.01)
sum = Math.pow(sum, 0.02);
else
sum = Math.pow(sum, alpha);
//return not the L_p norm but the L_p norm ^p
}
return sum;
}
public int compareTo(StableSketch that) {
// incomparable sketches
if (this.alpha != that.alpha || this.sksize != that.sksize
|| this.seed != that.seed)
return 1;
return 0;
}
}