/** * Copyright 2013 BigML * Licensed under the Apache License, Version 2.0 * http://www.apache.org/licenses/LICENSE-2.0 */ package org.streaminer.stream.histogram.spdt; import java.util.Collection; import java.util.HashMap; import java.util.Map.Entry; import java.util.TreeMap; import java.util.TreeSet; /** * This class implements bin operations (insertions, merges, etc) for a histogram. * This implementation is best for histograms with a large (>256) number of bins. * It uses tree data structures to give O(logN) insert performance with regard to * the number of bins in the histogram. For histograms with fewer bins, the * ArrayBinReservoir class offers faster insert performance. */ public class TreeBinReservoir<T extends Target> extends BinReservoir<T> { public TreeBinReservoir(int maxBins, boolean weightGaps, Long freezeThreshold) { super(maxBins, weightGaps, freezeThreshold); _bins = new TreeMap<Double, Bin<T>>(); _gaps = new TreeSet<Gap<T>>(); _binsToGaps = new HashMap<Double, Gap<T>>(); } @Override public void insert(Bin<T> bin) { addTotalCount(bin); if (isFrozen() && getBins().size() == getMaxBins()) { Double floorDiff = Double.MAX_VALUE; Bin<T> floorBin = floor(bin.getMean()); if (floorBin != null) { floorDiff = Math.abs(floorBin.getMean() - bin.getMean()); } Double ceilDiff = Double.MAX_VALUE; Bin<T> ceilBin = ceiling(bin.getMean()); if (ceilBin != null) { ceilDiff = Math.abs(ceilBin.getMean() - bin.getMean()); } if (floorDiff <= ceilDiff) { floorBin.sumUpdate(bin); } else { ceilBin.sumUpdate(bin); } } else { Bin<T> existingBin = get(bin.getMean()); if (existingBin != null) { existingBin.sumUpdate(bin); if (isWeightGaps()) { updateGaps(existingBin); } } else { updateGaps(bin); _bins.put(bin.getMean(), bin); } } } @Override public Bin<T> first() { return binFromEntry(_bins.firstEntry()); } @Override public Bin<T> last() { return binFromEntry(_bins.lastEntry()); } @Override public Bin<T> get(double p) { return _bins.get(p); } @Override public Bin<T> floor(double p) { return binFromEntry(_bins.floorEntry(p)); } @Override public Bin<T> ceiling(double p) { return binFromEntry(_bins.ceilingEntry(p)); } @Override public Bin<T> higher(double p) { return binFromEntry(_bins.higherEntry(p)); } @Override public Bin<T> lower(double p) { return binFromEntry(_bins.lowerEntry(p)); } @Override public Collection<Bin<T>> getBins() { return _bins.values(); } @Override public void merge() { while (_bins.size() > getMaxBins()) { Gap<T> smallestGap = _gaps.pollFirst(); Bin<T> newBin = smallestGap.getStartBin().combine(smallestGap.getEndBin()); Gap<T> followingGap = _binsToGaps.get(smallestGap.getEndBin().getMean()); if (followingGap != null) { _gaps.remove(followingGap); } _bins.remove(smallestGap.getStartBin().getMean()); _bins.remove(smallestGap.getEndBin().getMean()); _binsToGaps.remove(smallestGap.getStartBin().getMean()); _binsToGaps.remove(smallestGap.getEndBin().getMean()); updateGaps(newBin); _bins.put(newBin.getMean(), newBin); } } private void updateGaps(Bin<T> newBin) { Bin<T> prev = lower(newBin.getMean()); if (prev != null) { updateGaps(prev, newBin); } Bin<T> next = higher(newBin.getMean()); if (next != null) { updateGaps(newBin, next); } } private void updateGaps(Bin<T> prev, Bin<T> next) { Gap<T> newGap = new Gap<T>(prev, next, gapWeight(prev, next)); Gap<T> prevGap = _binsToGaps.get(prev.getMean()); if (prevGap != null) { _gaps.remove(prevGap); } _binsToGaps.put(prev.getMean(), newGap); _gaps.add(newGap); } private Bin<T> binFromEntry(Entry<Double, Bin<T>> entry) { if (entry == null) { return null; } else { return entry.getValue(); } } private final TreeMap<Double, Bin<T>> _bins; private final TreeSet<Gap<T>> _gaps; private final HashMap<Double, Gap<T>> _binsToGaps; }