/**
*
*/
package org.streaminer.stream.model;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* This class implements a histogram-model observed from a data stream.
* </p>
*
* @author Christian Bockermann <chris@jwall.org>
*
*/
public class NumericalDistributionModel
implements SelectiveDescriptionModel<Double, Double>, Distribution<Double> {
/** The unique class ID */
private static final long serialVersionUID = -4642672370564928117L;
static Logger log = LoggerFactory.getLogger( NumericalDistributionModel.class );
Integer count = 0;
Double span = 1.0d;
Double interval = 0.1d;
Double lowerBound = 0.0d;
Integer[] histogram;
/**
* Creates a new distribution model. The specified parameter determines
* the interval of the bins, that are created.
*
* @param interval
*/
public NumericalDistributionModel( Integer binCount, Double span ){
interval = span / binCount.doubleValue();
log.info( "bin-interval is {}", interval );
histogram = new Integer[ binCount ];
for( int i = 0; i < histogram.length; i++ )
histogram[i] = 0;
}
/**
* @see stream.model.SelectiveDescriptionModel#describe(java.lang.Object)
*/
@Override
public Double describe(Double parameter) {
int idx = findInterval( parameter );
return histogram[idx].doubleValue();
}
/**
* Add a new value to the model.
* @param newVal
*/
public void update( Double newVal ){
Double val = normalize( newVal );
int idx = findInterval( val );
//log.info( "interval for value {} is {}", val, idx );
histogram[idx]++;
count++;
}
/**
* Find the interval for the given double value.
*
* @param val
* @return
*/
protected int findInterval( Double val ){
for( int i = 0; i < histogram.length; i++ ){
if( (lowerBound + i * this.interval) >= val )
return Math.max( 0, i - 1 );
}
return histogram.length - 1;
}
public Double getInterval(){
return this.interval;
}
public Set<Double> getElements(){
Set<Double> el = new TreeSet<Double>();
for( int i = 0; i < histogram.length; i++ )
el.add( histogram[i].doubleValue() );
return el;
}
public Map<Double,Double> getHistogram(){
Map<Double,Double> d = new TreeMap<Double,Double>();
Double total = count.doubleValue();
if( total == 0.0d )
total = 1.0d;
for( int i = 0; i < histogram.length; i++ )
d.put( histogram[i].doubleValue(), histogram[i].doubleValue() / total );
return d;
}
public Double normalize( Double val ){
return val / span;
}
/**
* @see stream.model.Distribution#getCount()
*/
@Override
public Integer getCount() {
return count;
}
/**
* @see stream.model.Distribution#prob(java.io.Serializable)
*/
@Override
public Double prob(Double value) {
int i = this.findInterval( value );
Double cnt = this.histogram[i].doubleValue();
return cnt / count.doubleValue();
}
/**
* @see stream.model.Distribution#getCount(java.io.Serializable)
*/
@Override
public Integer getCount(Double value) {
int i = this.findInterval( value );
return this.histogram[i];
}
}