/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2013 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.spatial.histograms.utils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import xxl.core.collections.MapEntry;
import xxl.core.cursors.Cursor;
import xxl.core.indexStructures.ORTree;
import xxl.core.indexStructures.RTree;
import xxl.core.indexStructures.RTree.Node;
import xxl.core.spatial.SpatialUtils;
import xxl.core.spatial.points.DoublePoint;
import xxl.core.spatial.rectangles.DoublePointRectangle;
/**
*
* @see T. Eavis and A. Lopez. Rk-hist: an r-tree based
* histogram for multi-dimensional selectivity estimation. CIKM 2007
*
*
*
*/
public class RKhist {
/**
*
* @param bulkLoadedRTree
* @param numberOfHyperBlocks
* @param ratio
* @return
*/
public static List<SpatialHistogramBucket> buildRKHist(RTree bulkLoadedRTree, int numberOfNodes,
int numberOfHyperBlocks, double ratio, int dimension){
// partition the
double overestimation = (numberOfHyperBlocks-( numberOfHyperBlocks*ratio));
int hyperBlockSize = (int) Math.ceil(numberOfNodes/overestimation);
int floor = (int) Math.floor(numberOfNodes/overestimation);
int maxHC = numberOfNodes /hyperBlockSize;
int maxHF = numberOfNodes /floor;
int penaltyNumber = (int) ((numberOfHyperBlocks*ratio)/2);
System.out.println("Overestimetion: " + overestimation);
System.out.println("Ceil: " +hyperBlockSize + " floor: " + floor);
if ((penaltyNumber/2) + maxHF <= numberOfHyperBlocks){
hyperBlockSize = floor;
}
System.out.println("hyper block size: " + hyperBlockSize);
List<MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>>> nodeList = new ArrayList<MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>>>();
List<MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>>> penaltyList = new ArrayList<MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>>>();
// cursor of level 0 leaf nodes buckets
Cursor<MapEntry<DoublePointRectangle, RTree.Node>> nodes = SpatialUtils.getNodesAndMBRs(bulkLoadedRTree, 1);
// create hyperblocks and create penalty list;
while(nodes.hasNext()){
List<MapEntry<DoublePointRectangle, RTree.Node>> hyperNode = new ArrayList<MapEntry<DoublePointRectangle, RTree.Node>>();
for(int i = 0; nodes.hasNext() && i < hyperBlockSize; i++ ){
MapEntry<DoublePointRectangle, RTree.Node> h = nodes.next();
hyperNode.add(h);
}
double costs = kMetricCosts(hyperNode, dimension);
nodeList.add(new MapEntry<Double, List<MapEntry<DoublePointRectangle,Node>>>(costs, hyperNode));
}
// sort list and take penaltyNumber
Collections.sort(nodeList, new Comparator<MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>>>() {
@Override
public int compare(
MapEntry<Double, List<MapEntry<DoublePointRectangle, Node>>> arg0,
MapEntry<Double, List<MapEntry<DoublePointRectangle, Node>>> arg1) {
return arg0.getKey().compareTo(arg1.getKey());
}
});
penaltyList.addAll(nodeList.subList(0, penaltyNumber));
nodeList.subList(0, penaltyNumber).clear();
// step 2.
boolean enhanceable = true;
int splits = nodeList.size()+penaltyList.size();
mark: while(enhanceable){
// if(penaltyList.size() + overestimation >= numberOfHyperBlocks)
if(penaltyList.size() + nodeList.size() >= numberOfHyperBlocks)
break mark;
List<MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>>> penaltyTempList = new
ArrayList<MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>>>();
boolean noChange = true;
formark: for(int i = 0; i < penaltyList.size(); i++){
MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>> hyperNode =
penaltyList.get(i);
List<MapEntry<DoublePointRectangle, RTree.Node>> list = hyperNode.getValue();
int argMin = 0;
double min = Double.MAX_VALUE;
double argCostLeft = 0;
double argCostRight = 0;
for(int j = 0; j < list.size()-1; j++){
double costsLeft = kMetricCosts(list.subList(0, j+1), dimension);
double costRight = kMetricCosts(list.subList(j+1, list.size()), dimension);
if (costsLeft + costRight < min){
min = costsLeft+ costRight;
argCostLeft = costsLeft;
argCostRight = costRight;
argMin = j;
}
}
if(min < hyperNode.getKey()){
penaltyTempList.add(new MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>> (argCostLeft,
list.subList(0, argMin+1) ));
penaltyTempList.add(new MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>> (argCostRight,
list.subList(argMin+1, list.size()) ));
penaltyList.remove(i);
i--;
splits++;
noChange = false;
if (splits >= numberOfHyperBlocks){
break formark;
}
}
}
penaltyList.addAll(penaltyTempList);
enhanceable = !noChange;
}
// step 3. merge to histogram
nodeList.addAll(penaltyList);
List<SpatialHistogramBucket> histogram = new ArrayList<SpatialHistogramBucket>();
for(MapEntry< Double,List<MapEntry<DoublePointRectangle, RTree.Node>>> entry : nodeList){
SpatialHistogramBucket mbr = null;
int weight = 0;
for (MapEntry<DoublePointRectangle, RTree.Node> value : entry.getValue()){
if(mbr == null){
mbr = new SpatialHistogramBucket(value.getKey());
}else
mbr.union(value.getKey());
weight +=value.getValue().number();
// compute avg
Iterator it = value.getValue().entries();
while(it.hasNext()){
DoublePointRectangle rec = (DoublePointRectangle) bulkLoadedRTree.descriptor(it.next());
mbr.updateAverage(rec);
}
}
mbr.setWeight(weight);
histogram.add(mbr);
}
// todo compute
return histogram;
}
/**
*
* @param bulkLoadedRTree
* @param numberOfHyperBlocks
* @param ratio
* @return
*/
public static List<SpatialHistogramBucket> buildRKHist2(RTree bulkLoadedRTree, int numberOfNodes,
int numberOfHyperBlocks, double ratio, int dimension){
// partition the
double overestimation = (numberOfHyperBlocks-( numberOfHyperBlocks*ratio));
int hyperBlockSize = (int) Math.ceil(numberOfNodes/overestimation);
int floor = (int) Math.floor(numberOfNodes/overestimation);
int maxHC = numberOfNodes /hyperBlockSize;
int maxHF = numberOfNodes /floor;
int penaltyNumber = (int) ((numberOfHyperBlocks*ratio)/2);
System.out.println("Overestimetion: " + overestimation);
System.out.println("Ceil: " +hyperBlockSize + " floor: " + floor);
if ((penaltyNumber/2) + maxHF <= numberOfHyperBlocks){
hyperBlockSize = floor;
}
System.out.println("hyper block size: " + hyperBlockSize);
List<MapEntry< Double,List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>>> nodeList = new ArrayList<MapEntry< Double,List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>>>();
List<MapEntry< Double,List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>>> penaltyList = new ArrayList<MapEntry< Double,List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>>>();
// cursor of level 0 leaf nodes buckets
// Cursor<MapEntry<DoublePointRectangle, RTree.Node>> nodes = RGOhist.getNodesAndMBRs(bulkLoadedRTree, 1);
Cursor<MapEntry<DoublePointRectangle, ORTree.IndexEntry>> nodes = SpatialUtils.getIndexEntriesAndMBRs(bulkLoadedRTree, 1);
// create hyperblocks and create penalty list;
while(nodes.hasNext()){
List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>> hyperNode = new ArrayList<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>();
for(int i = 0; nodes.hasNext() && i < hyperBlockSize; i++ ){
MapEntry<DoublePointRectangle, ORTree.IndexEntry> h = nodes.next();
hyperNode.add(h);
}
double costs = kMetricCostsIndex(hyperNode, dimension);
nodeList.add(new MapEntry<Double, List<MapEntry<DoublePointRectangle,ORTree.IndexEntry>>>(costs, hyperNode));
}
// sort list and take penaltyNumber
Collections.sort(nodeList, new Comparator<MapEntry< Double,List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>>>() {
@Override
public int compare(
MapEntry<Double, List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>> arg0,
MapEntry<Double, List<MapEntry<DoublePointRectangle,ORTree.IndexEntry>>> arg1) {
return arg0.getKey().compareTo(arg1.getKey());
}
});
penaltyList.addAll(nodeList.subList(0, penaltyNumber));
nodeList.subList(0, penaltyNumber).clear();
// step 2.
boolean enhanceable = true;
int splits = nodeList.size()+penaltyList.size();
mark: while(enhanceable){
// if(penaltyList.size() + overestimation >= numberOfHyperBlocks)
if(penaltyList.size() + nodeList.size() >= numberOfHyperBlocks)
break mark;
List<MapEntry< Double,List<MapEntry<DoublePointRectangle,ORTree.IndexEntry>>>> penaltyTempList = new
ArrayList<MapEntry< Double,List<MapEntry<DoublePointRectangle,ORTree.IndexEntry>>>>();
boolean noChange = true;
formark: for(int i = 0; i < penaltyList.size(); i++){
MapEntry< Double,List<MapEntry<DoublePointRectangle,ORTree.IndexEntry>>> hyperNode =
penaltyList.get(i);
List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>> list = hyperNode.getValue();
int argMin = 0;
double min = Double.MAX_VALUE;
double argCostLeft = 0;
double argCostRight = 0;
for(int j = 0; j < list.size()-1; j++){
double costsLeft = kMetricCostsIndex(list.subList(0, j+1), dimension);
double costRight = kMetricCostsIndex(list.subList(j+1, list.size()), dimension);
if (costsLeft + costRight < min){
min = costsLeft+ costRight;
argCostLeft = costsLeft;
argCostRight = costRight;
argMin = j;
}
}
if(min < hyperNode.getKey()){
penaltyTempList.add(new MapEntry< Double,List<MapEntry<DoublePointRectangle,ORTree.IndexEntry>>> (argCostLeft,
list.subList(0, argMin+1) ));
penaltyTempList.add(new MapEntry< Double,List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>> (argCostRight,
list.subList(argMin+1, list.size()) ));
penaltyList.remove(i);
i--;
splits++;
noChange = false;
if (splits >= numberOfHyperBlocks){
break formark;
}
}
}
penaltyList.addAll(penaltyTempList);
enhanceable = !noChange;
}
// step 3. merge to histogram
nodeList.addAll(penaltyList);
List<SpatialHistogramBucket> histogram = new ArrayList<SpatialHistogramBucket>();
for(MapEntry< Double,List<MapEntry<DoublePointRectangle, ORTree.IndexEntry>>> entry : nodeList){
SpatialHistogramBucket mbr = null;
int weight = 0;
for (MapEntry<DoublePointRectangle,ORTree.IndexEntry> value : entry.getValue()){
Node node = getNode(value.getValue());
if(mbr == null){
mbr = new SpatialHistogramBucket(value.getKey());
}else
mbr.union(value.getKey());
weight +=node.number();
// compute avg
Iterator it = node.entries();
while(it.hasNext()){
DoublePointRectangle rec = (DoublePointRectangle) bulkLoadedRTree.descriptor(it.next());
mbr.updateAverage(rec);
}
}
mbr.setWeight(weight);
histogram.add(mbr);
}
// todo compute
return histogram;
}
private static Node getNode(ORTree.IndexEntry indeEntry){
RTree.Node node = (RTree.Node)indeEntry.get();
return node;
}
/**
*
* @param rectangles
* @param dimension
* @return
*/
@SuppressWarnings("rawtypes")
public static double kMetricCosts(List<MapEntry<DoublePointRectangle, RTree.Node>> rectangles, int dimension){
List<DoublePoint> recs = new ArrayList<DoublePoint>();
DoublePointRectangle uni = null;
for(MapEntry<DoublePointRectangle, RTree.Node> entry: rectangles){
Iterator it = entry.getValue().entries();
while(it.hasNext()){
DoublePointRectangle rec = (DoublePointRectangle)it.next();
recs.add(rec.getCenter());
// FIXME
if(uni == null){
uni = new DoublePointRectangle(rec);
}else{
uni.union(rec);
}
}
}
return kMetricCost( recs, uni);
}
/**
*
* @param rectangles
* @param dimension
* @return
*/
@SuppressWarnings("rawtypes")
public static double kMetricCostsIndex(List<MapEntry<DoublePointRectangle,ORTree.IndexEntry>> rectangles, int dimension){
List<DoublePoint> recs = new ArrayList<DoublePoint>();
DoublePointRectangle uni = null;
for(MapEntry<DoublePointRectangle, ORTree.IndexEntry> entry: rectangles){
Node node = getNode(entry.getValue());
Iterator it = node.entries();
while(it.hasNext()){
DoublePointRectangle rec = (DoublePointRectangle)it.next();
recs.add(rec.getCenter());
// FIXME
if(uni == null){
uni = new DoublePointRectangle(rec);
}else{
uni.union(rec);
}
}
}
return kMetricCost( recs, uni);
}
/**
*
* @param recs
* @param dimension
* @return
*/
public static double kMetricCost(List<DoublePoint> recs, DoublePointRectangle uni){
List<Double> volumes = new ArrayList<Double>();
kMetricCosts(recs, 0, uni.dimensions(), volumes, uni);
double sum = 0;
for(Double vol : volumes){
sum +=vol;
}
double avg = sum / volumes.size();
sum = 0;
for(Double vol : volumes){
sum +=Math.pow((vol - avg), 2);
}
return sum;
}
/**
*
* @param recs
* @param currentDim
* @param dimensions
* @param volumes
* @param vol
*/
public static void kMetricCosts(List<DoublePoint> recs, final int currentDim, int dimensions, List<Double> volumes, DoublePointRectangle vol){
if (recs.size() == 1){
//compute
volumes.add(vol.area());
return;
}
// find median
int nextDim = (currentDim+1) % dimensions;
// FIXME change to 5 median problem in linear time
// now simple solution sort and take n/2
Collections.sort(recs, new Comparator<DoublePoint>() {
@Override
public int compare(DoublePoint o1, DoublePoint o2) {
double d1 = o1.getValue(currentDim);
double d2 = o2.getValue(currentDim);
return (d1==d2)? 0 : (d1>d2)? 1:-1 ;
}
}
);
DoublePoint p = recs.get(recs.size()/2);
double val = p.getValue(currentDim);
// compute mmbr left mbr right
DoublePointRectangle[] leftRight = cut(vol, val, currentDim);
// System.out.println("cut " + Arrays.toString(leftRight));
// System.out.println("#######################################");
// process left
kMetricCosts(recs.subList(0, recs.size()/2), nextDim, dimensions, volumes, leftRight[0]);
kMetricCosts(recs.subList(recs.size()/2, recs.size()), nextDim, dimensions, volumes, leftRight[1]);
}
/**
*
* @param rec
* @param val
* @param dim
* @return
*/
public static DoublePointRectangle[] cut(DoublePointRectangle rec, double val, int dim){
DoublePointRectangle[] recs = new DoublePointRectangle[2];
DoublePoint pl1 = (DoublePoint) rec.getCorner(false);
DoublePoint ph2 = (DoublePoint) rec.getCorner(true);
double[] p1 = new double[rec.dimensions()];
double[] p2 = new double[rec.dimensions()];
for(int i = 0 ; i < ph2.dimensions(); i++){
p1[i] = ph2.getValue(i);
p2[i] = pl1.getValue(i);
}
p1[dim] = val;
p2[dim] = val;
DoublePoint ph1 = new DoublePoint(p1);
DoublePoint pl2 = new DoublePoint(p2);
recs[0] = new DoublePointRectangle(pl1, ph1);
recs[1] = new DoublePointRectangle(pl2, ph2);
return recs;
}
}