/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2013 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.spatial.histograms.utils;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import xxl.core.cursors.Cursor;
import xxl.core.functions.Functional.UnaryFunction;
import xxl.core.spatial.SpaceFillingCurves;
import xxl.core.spatial.points.DoublePoint;
import xxl.core.spatial.rectangles.DoublePointRectangle;
/**
*
* see: S. Acharya, V. Poosala, and S. Ramaswamy.
* Selectivity estimation in spatial databases. SIGMOD '99
*
*/
public class MinSkewHist {
/**
* Precision of the space filling curve.
*/
protected static int FILLING_CURVE_PRECISION = 128;
/**
*
*/
public static boolean verbose = false;
/**
*
* @param sfcPrecision
* @return
*/
@SuppressWarnings("serial")
public static UnaryFunction<double[], int[]> getSFCFunction(final int sfcPrecision){
return new UnaryFunction<double[], int[]>() {
@Override
public int[] invoke(double[] from) {
int[] to = new int[from.length];
for (int i = 0; i < to.length; i++) {
to[i] = (int) (from[i] * sfcPrecision);
}
return to;
}
};
}
// Space MUST have been normalized to [0.0,1.0]
/**
*
* @param rectangles
* @param bitsPerDim
* @param dimensions
* @return
*/
public static Map<Long, Integer> computeGrid(
Cursor<DoublePointRectangle> rectangles, int bitsPerDim,
int dimensions) {
Map<Long, Integer> grid = new HashMap<Long, Integer>();
rectangles.reset();
UnaryFunction<double[], int[]> convertToFillingCurveValues = getSFCFunction( (1 << (bitsPerDim-1))) ;
for (; rectangles.hasNext();) {
DoublePointRectangle actRectangle = rectangles.next();
double[] lowleft = (double[]) actRectangle.getCorner(false)
.getPoint();
double[] upright = (double[]) actRectangle.getCorner(true)
.getPoint();
int[] intLowLeft = convertToFillingCurveValues.invoke(lowleft);
int[] intUpRight = convertToFillingCurveValues.invoke(upright);
try {
List<long[]> zcodesList = SpaceFillingCurves.computeZBoxRanges(
intLowLeft, intUpRight, bitsPerDim, dimensions);
for (long[] zcodes : zcodesList) {
for (long zcode = zcodes[0]; zcode <= zcodes[1]; zcode++) {
Integer value = 1;
if (grid.containsKey(zcode)) {
value = grid.get(zcode);
value++;
}
grid.put(zcode, value);
}
}
} catch (Exception ex) {
throw new RuntimeException("Z-Curve box computation problem",ex);
}
}
return grid;
}
// Space MUST have been normalized to [0.0,1.0]
/**
*
* @param rectangles
* @param bitsPerDim
* @param dimensions
* @return
*/
public static Map<Long, Integer> computeGridForForest(
Cursor<DoublePointRectangle> rectangles, int bitsPerDim,
int dimensions) {
Map<Long, Integer> grid = new HashMap<Long, Integer>();
UnaryFunction<double[], int[]> convertToFillingCurveValues = getSFCFunction( (1 << (bitsPerDim-1))) ;
for (; rectangles.hasNext();) {
DoublePointRectangle actRectangle = rectangles.next();
double[] lowleft = (double[]) actRectangle.getCorner(false)
.getPoint();
double[] upright = (double[]) actRectangle.getCorner(true)
.getPoint();
int[] intLowLeft = convertToFillingCurveValues.invoke(lowleft);
int[] intUpRight = convertToFillingCurveValues.invoke(upright);
try {
List<long[]> zcodesList = SpaceFillingCurves.computeZBoxRanges(
intLowLeft, intUpRight, bitsPerDim, dimensions);
for (long[] zcodes : zcodesList) {
for (long zcode = zcodes[0]; zcode <= zcodes[1]; zcode++) {
Integer value = 1;
if (grid.containsKey(zcode)) {
value = grid.get(zcode);
value++;
}
grid.put(zcode, value);
}
}
} catch (Exception ex) {
throw new RuntimeException("Z-Curve box computation problem",ex);
}
}
return grid;
}
/**
*
*
*
* @param rectangles
* @param universe
* @param bitsdPerDim
* @param dimensions
* @param maxBuckets
* @param refinements
* @return
*/
public static List<SpatialHistogramBucket> buildProgressiveRefinement(Cursor<DoublePointRectangle> rectangles,
DoublePointRectangle universe, int bitsdPerDim, int dimensions,
int maxBuckets, int refinements){
PriorityQueue<Bucket> buckets = new PriorityQueue<Bucket>(100,
new Comparator<Bucket>() {
@Override
public int compare(Bucket o1, Bucket o2) {
if (o2.getBestReduction() - o1.getBestReduction() > 0) {
return 1;
} else if (o2.getBestReduction()
- o1.getBestReduction() < 0) {
return -1;
} else {
return 0;
}
}
});
List<SpatialHistogramBucket> histogram = new ArrayList<SpatialHistogramBucket>();
List<Bucket> tempList = new ArrayList<>();
Map<Long, Integer> grid; // grid
int refStep = refinements;
int step = maxBuckets/(refinements + 1);
int bucketsProRefimenement = maxBuckets/(refinements + 1);
grid = computeGrid(rectangles, bitsdPerDim - refStep,
dimensions);
// 2. initial bucket berechnen
Bucket initialBucket = new Bucket(
new DoublePointRectangle(dimensions)
.normalize(new DoublePointRectangle(dimensions)));
initialBucket.computeSkew(grid, bitsdPerDim - refStep);
buckets.add(initialBucket);
// 3. restliche buckets berechnen
if(verbose)
System.out.println("start");
while (tempList.size() + buckets.size() <= maxBuckets) {
if(verbose)
System.out.println("Buckets: " + buckets.size() + " max Buckets: "
+ maxBuckets);
Bucket bucketTosplit = buckets.poll();
bucketTosplit.computeBestSplit(grid, bitsdPerDim - refStep);
// check if it exists
if(bucketTosplit.getBestOne() == null){
// cannot be splitted add to temp list
tempList.add(bucketTosplit);
}else{
bucketTosplit.getBestOne().computeBestSplit(grid, bitsdPerDim - refStep);
bucketTosplit.getBestTwo().computeBestSplit(grid, bitsdPerDim - refStep);
buckets.add(bucketTosplit.getBestOne());
buckets.add(bucketTosplit.getBestTwo());
}
if (buckets.isEmpty() ){
if (!tempList.isEmpty()){
buckets.addAll(tempList);
tempList.clear();
}
if (refStep <= 0){ // case all buckets are in temp and we cannot refine
break;
}
refStep--; // try to refine and compute grid
grid = computeGrid(rectangles, bitsdPerDim - refStep,
dimensions); //
bucketsProRefimenement += step;
}else if (tempList.size() + buckets.size() >= bucketsProRefimenement){
refStep--; // try to refine and compute grid
grid = computeGrid(rectangles, bitsdPerDim - refStep,
dimensions); //
// add temp buckets to list
buckets.addAll(tempList);
tempList.clear();
bucketsProRefimenement += step;
}
}
buckets.addAll(tempList);
rectangles.reset();
while (rectangles.hasNext()) {
DoublePointRectangle dpr = rectangles.next();
DoublePoint mitte = dpr.getCenter();
for (Bucket bucket : buckets) {
if (bucket.contains(mitte)) {
bucket.setWeight(bucket.getWeight() +1);
bucket.updateAverage(dpr);
}
}
for (SpatialHistogramBucket bucket : histogram) {
if (bucket.contains(mitte)) {
bucket.setWeight(bucket.getWeight() +1);
bucket.updateAverage(dpr);
}
}
}
histogram.addAll(buckets);
rectangles.reset();
return histogram;
}
/**
*
* @param rectangles
* @param gridSize
* @param dimensions
* @return
*/
public static List<SpatialHistogramBucket> buildHistogram(
Cursor<DoublePointRectangle> rectangles,
DoublePointRectangle universe, int bitsdPerDim, int dimensions,
int maxBuckets) {
PriorityQueue<Bucket> buckets = new PriorityQueue<Bucket>(100,
new Comparator<Bucket>() {
@Override
public int compare(Bucket o1, Bucket o2) {
if (o2.getBestReduction() - o1.getBestReduction() > 0) {
return 1;
} else if (o2.getBestReduction()
- o1.getBestReduction() < 0) {
return -1;
} else {
return 0;
}
}
});
List<SpatialHistogramBucket> histogram = new ArrayList<SpatialHistogramBucket>();
// 1.initial grid
Map<Long, Integer> grid = computeGrid(rectangles, bitsdPerDim,
dimensions);
// 2.first bucket
Bucket initialBucket = new Bucket(
new DoublePointRectangle(dimensions)
.normalize(new DoublePointRectangle(dimensions)));
initialBucket.computeSkew(grid, bitsdPerDim);
initialBucket.computeBestSplit(grid, bitsdPerDim);
buckets.add(initialBucket);
// 3. restliche buckets berechnen
if(verbose)
System.out.println("starte!!!");
while ((buckets.size() + histogram.size()) <= maxBuckets && buckets.size() > 0) {
if(verbose)
System.out.println("Buckets: " + buckets.size() + " max Buckets: "
+ maxBuckets);
Bucket bucketTosplit = buckets.poll();
if(bucketTosplit.getBestOne() == null){
histogram.add(bucketTosplit);
continue;
}
if(verbose)
System.out.println("P: " + bucketTosplit + "BestReduction: "
+ bucketTosplit.getBestReduction());
bucketTosplit.getBestOne().computeBestSplit(grid, bitsdPerDim);
bucketTosplit.getBestTwo().computeBestSplit(grid, bitsdPerDim);
if(verbose){
System.out.println("1: " + bucketTosplit.getBestOne()
+ "BestReduction: "
+ bucketTosplit.getBestOne().getBestReduction());
System.out.println("2: " + bucketTosplit.getBestTwo()
+ "BestReduction: "
+ bucketTosplit.getBestTwo().getBestReduction());
}
buckets.add(bucketTosplit.getBestOne());
buckets.add(bucketTosplit.getBestTwo());
}
rectangles.reset();
while (rectangles.hasNext()) {
DoublePointRectangle dpr = rectangles.next();
DoublePoint mitte = dpr.getCenter();
for (Bucket bucket : buckets) {
if (bucket.contains(mitte)) {
bucket.setWeight(bucket.getWeight() +1);
bucket.updateAverage(dpr);
}
}
for (SpatialHistogramBucket bucket : histogram) {
if (bucket.contains(mitte)) {
bucket.setWeight(bucket.getWeight() +1);
bucket.updateAverage(dpr);
}
}
}
histogram.addAll(buckets);
return histogram;
}
/**
*
* This static class is used minskew computation. It reperesents a grid cell.
*
*/
@SuppressWarnings("serial")
public static class Bucket extends SpatialHistogramBucket {
/**
*
*/
private Double skew = Double.MAX_VALUE;
/**
*
*/
private Double bestReduction = 0.0;
/**
*
*/
private Bucket bestOne = null;
/**
*
*/
private Bucket bestTwo = null;
/**
*
*/
private int numberOfReferencedObjects;
/**
*
*/
private int localBitsProDim = 0;
/**
*
* @param dimension
*/
public Bucket(int dimension) {
super(dimension);
}
/**
*
* @param dimension
* @param localBitsProDim
*/
public Bucket(int dimension,int localBitsProDim) {
super(dimension);
this.localBitsProDim = localBitsProDim;
}
/**
*
* @param rec
*/
public Bucket(DoublePointRectangle rec) {
super(rec);
}
/**
*
* @param grid
* @param bitsdPerDim
*/
public void computeSkew(Map<Long, Integer> grid, int bitsdPerDim) {
UnaryFunction<double[], int[]> convertToFillingCurveValues = getSFCFunction( (1 << (bitsdPerDim-1))) ;
int[] intLowLeft = convertToFillingCurveValues
.invoke(this.leftCorner);
int[] intUpRight = convertToFillingCurveValues
.invoke(this.rightCorner);
List<long[]> zcodesList = SpaceFillingCurves.computeZBoxRanges(
intLowLeft, intUpRight, bitsdPerDim, this.dimensions());
List<Integer> allValues = new LinkedList<Integer>();
for (long[] zcodes : zcodesList) {
for (long zcode = zcodes[0]; zcode <= zcodes[1]; zcode++) {
Integer value = 0;
if (grid.containsKey(zcode)) {
value = grid.get(zcode);
}
allValues.add(value);
}
}
Double summe = 0.0;
for (Integer i : allValues) {
summe += i;
}
Double schnitt = summe / allValues.size();
Double skew = 0.0;
for (Integer i : allValues) {
skew += (i - schnitt) * (i - schnitt);
}
setNumberOfReferencedObjects(allValues.size());
setSkew(skew);
}
/**
*
* @param size
*/
public void setNumberOfReferencedObjects(int size) {
this.numberOfReferencedObjects = size;
}
/**
*
* @return
*/
public int getNumberOfReferencedObjects() {
return numberOfReferencedObjects;
}
public void computeBestSplit(Map<Long, Integer> grid, int bitsdPerDim) {
double gridsize = 0.0;
if (bitsdPerDim > localBitsProDim)
gridsize = 1.0 / Math.pow(2, bitsdPerDim-1);
for (int dim = 0; dim < dimensions(); dim++) {
for (int step = 1; (this.leftCorner[dim] + (step) * gridsize) < this.rightCorner[dim]; step++) {// splitstelle?!?
// ausrechenen
Bucket one = new Bucket(dimensions());
Bucket two = new Bucket(dimensions());
double[] cutPointL = new double[this.rightCorner.length];
System.arraycopy(this.rightCorner, 0, cutPointL, 0,
this.rightCorner.length);
cutPointL[dim] = this.leftCorner[dim] + step * gridsize;
double[] cutPointR = new double[this.leftCorner.length];
System.arraycopy(this.leftCorner, 0, cutPointR, 0,
this.leftCorner.length);
cutPointR[dim] = this.leftCorner[dim] + (step) * gridsize;
one.leftCorner = this.leftCorner;
one.rightCorner = cutPointL;
two.leftCorner = cutPointR;
two.rightCorner = this.rightCorner;
one.computeSkew(grid, bitsdPerDim);
two.computeSkew(grid, bitsdPerDim);
Double actReduction = getSkew() - (one.getSkew() + two.getSkew());
if (bestReduction < actReduction ) {
bestReduction = actReduction;
setBestOne(one);
setBestTwo(two);
}
}
}
if (bestOne == null && verbose){
System.out.println("Problem!");
}
}
/**
*
* @return
*/
public Double getSkew() {
return skew;
}
/**
*
* @param skew
*/
public void setSkew(Double skew) {
this.skew = skew;
}
/**
*
* @return
*/
public Bucket getBestOne() {
return bestOne;
}
/**
*
* @param bestOne
*/
private void setBestOne(Bucket bestOne) {
this.bestOne = bestOne;
}
/**
*
* @return
*/
public Bucket getBestTwo() {
return bestTwo;
}
/**
*
* @param bestTwo
*/
private void setBestTwo(Bucket bestTwo) {
this.bestTwo = bestTwo;
}
/**
*
* @return
*/
public Double getBestReduction() {
return bestReduction;
}
/*
* (non-Javadoc)
* @see xxl.core.spatial.histograms.utils.SpatialHistogramBucket#toString()
*/
@Override
public String toString() {
return this.bestReduction + " ; skew " + this.skew + " " + super.toString();
}
}
}