/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2013 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.spatial.histograms.utils; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; import xxl.core.collections.queues.io.BlockBasedQueue; import xxl.core.cursors.Cursor; import xxl.core.cursors.Cursors; import xxl.core.cursors.filters.Filter; import xxl.core.functions.Functional.UnaryFunction; import xxl.core.predicates.AbstractPredicate; import xxl.core.spatial.SpaceFillingCurves; import xxl.core.spatial.SpatialUtils; import xxl.core.spatial.points.DoublePoint; import xxl.core.spatial.rectangles.DoublePointRectangle; import xxl.core.util.Pair; /** * * @see Y. J. Roh, J. H. Kim, Y. D. Chung, J. H. Son, and * M. H. Kim. Hierarchically organized skew-tolerant * histograms for geographic data objects. In SIGMOD'10 * * */ public class STHist { public boolean verbose = false; public static final String tempSortX = "x_sort"; public static final String tempSortY = "y_sort"; public static final String sortTemp = "xy_sort"; public static final int BLOCK_SIZE = 4096; public static final int BITS_PRO_DIM = 7; protected Cursor<DoublePointRectangle> cursorX; protected Cursor<DoublePointRectangle> cursorY; protected BlockBasedQueue queueX; protected BlockBasedQueue queueY; protected List<DoublePointRectangle> data; // Buck public STHistBucket rootBucket; UnaryFunction<DoublePointRectangle, DoublePoint> toPoint; // we need a grid to estimate skew Map<Long, Integer> grid; UnaryFunction<double[], int[]> getSfcKey; int bitsPerDim; int dimension; public List<STHistBucket> forest; // we build pseudo rtree in main memory we can build an rtree public void buildHistogram(Iterator<DoublePointRectangle> rectangles, DoublePointRectangle universe, int buckets) throws IOException{ // toPoint = new UnaryFunction<DoublePointRectangle, DoublePoint>() { @Override public DoublePoint invoke(DoublePointRectangle arg) { return arg.getCenter(); } }; bitsPerDim = BITS_PRO_DIM; getSfcKey = MinSkewHist.getSFCFunction( (1 << (bitsPerDim-1))); dimension = 2; data = new ArrayList<DoublePointRectangle>(); while(rectangles.hasNext() ){ data.add(rectangles.next()); } buildGrid(Cursors.wrap(data.iterator()), bitsPerDim, dimension); SpatialHistogramBucket recUniverse = new SpatialHistogramBucket(universe); this.rootBucket = new STHistBucket(); recUniverse.setWeight(data.size()); this.rootBucket.setInfoRectangle(recUniverse); buildNode(this.rootBucket, data.iterator(), buckets); } protected void buildGrid(Cursor<DoublePointRectangle> rectangles, int bitsPerDim, int dimensions){ grid = MinSkewHist.computeGridForForest( rectangles, bitsPerDim, dimensions); } protected double computeSkew(SpatialHistogramBucket rectangle ){ // double skew = 0; double average = 0; double cells = 0; // compute DoublePointRectangle actRectangle = rectangle; double[] lowleft = (double[]) actRectangle.getCorner(false) .getPoint(); double[] upright = (double[]) actRectangle.getCorner(true) .getPoint(); int[] intLowLeft = getSfcKey.invoke(lowleft); int[] intUpRight = getSfcKey.invoke(upright); List<long[]> zcodesList = SpaceFillingCurves.computeZBoxRanges( intLowLeft, intUpRight, bitsPerDim, dimension); for(long[] zcodes : zcodesList){ cells +=( zcodes[1] - zcodes[0] + 1); } average = rectangle.getWeight() / cells; // x^ for (long[] zcodes : zcodesList) { for (long zcode = zcodes[0]; zcode <= zcodes[1]; zcode++) { if (grid.containsKey(zcode)) { double value = grid.get(zcode); skew += Math.pow((value - average), 2); } } } return skew; } protected void buildNode(STHistBucket subroot, Iterator<DoublePointRectangle> dataIt, int bucketsNumber) throws IOException{ // if(verbose) System.out.println("Run build node with nbi : " + bucketsNumber); double universeSizeFraction = Math.max(bucketsNumber, 2); double fractionFrequency = Math.max(bucketsNumber, 2); List<Pair<Double,SpatialHistogramBucket>> hotSpots = detectHotSpotsMainMemory( subroot.getInfoRectangle(), subroot.getInfoRectangle().getWeight(), Cursors.wrap(dataIt), toPoint, universeSizeFraction, fractionFrequency); // double allSkew = 0; for(Pair<Double,SpatialHistogramBucket> r : hotSpots){ subroot.getChildList().add(new STHistBucket(r.getElement2(), r.getElement1())); allSkew += r.getElement1(); } if (hotSpots.size() < bucketsNumber){ for (STHistBucket r : subroot.childList){ double nbi = hotSpots.size() * (r.getSkew()/ allSkew); if (nbi >= 1){ buildNode(r,getDataFilter(data.iterator(), r.infoRectangle), (int) nbi); } } } } protected Iterator<DoublePointRectangle> getDataFilter(Iterator<DoublePointRectangle> iterator, final DoublePointRectangle rect){ return new Filter<DoublePointRectangle>(iterator, new AbstractPredicate<DoublePointRectangle>() { @Override public boolean invoke(DoublePointRectangle argument) { DoublePoint p = toPoint.invoke(argument); return rect.contains(p); } }); } public void buildHotSpotForest(Iterator<DoublePointRectangle> rectangles, DoublePointRectangle universe, int buckets) throws IOException{ // toPoint = new UnaryFunction<DoublePointRectangle, DoublePoint>() { @Override public DoublePoint invoke(DoublePointRectangle arg) { return arg.getCenter(); } }; bitsPerDim = BITS_PRO_DIM; getSfcKey = MinSkewHist.getSFCFunction( (1 << (bitsPerDim-1))); dimension = 2; data = new ArrayList<DoublePointRectangle>(); while(rectangles.hasNext() ){ data.add(rectangles.next()); } buildGrid(Cursors.wrap(data.iterator()), bitsPerDim, dimension); forest = partition(universe, buckets); double allSkew = 0; for(STHistBucket bucket : forest){ allSkew += bucket.skew; } for(STHistBucket bucket : forest) { double nbi = forest.size() * (bucket.getSkew()/ allSkew); if (nbi >= 1){ buildNode(bucket, getDataFilter(data.iterator(), bucket.infoRectangle), (int) nbi); } } } protected List<STHist.STHistBucket> partition(final DoublePointRectangle universe, int buckets){ // create partitions List<STHist.STHistBucket> candidates = new ArrayList<STHist.STHistBucket>(); double xStep = Math.sqrt(universe.area()/buckets); double yStep = xStep; double xmin = universe.getCorner(false).getValue(0); double ymin = universe.getCorner(false).getValue(1); double xmax = universe.getCorner(true).getValue(0); double ymax = universe.getCorner(true).getValue(1); // sorted on x and then on y for(double x = xmin; x <= xmax - xStep; x+=xStep){ for(double y = ymin; y <= ymax - yStep; y+=yStep){ double[] left = {x,y}; double[] right = { (x+ xStep < xmax) ? x + xStep : xmax , (y+ yStep < ymax) ? y + yStep : ymax}; SpatialHistogramBucket rectangle = new SpatialHistogramBucket(left, right); // compute weight int count = 0; for(DoublePointRectangle r: data){ if (r.overlaps(rectangle)){ count++; } } if (count != 0){ rectangle.setWeight(count); double skew = computeSkew(rectangle); candidates.add(new STHistBucket(rectangle, skew)); } } } // merge candidates if skew(s1 +s2 ) <= skew(s1) +skew(s2) boolean change = true; List<STHist.STHistBucket> tempList = new ArrayList<STHist.STHistBucket>(candidates.size()); // sort data according predifined SFC Comparator<STHist.STHistBucket> bucketComparator = new Comparator<STHist.STHistBucket>() { Comparator<DoublePointRectangle> recComparator = SpatialUtils.getHilbert2DComparator(universe, 1 << 30); @Override public int compare(STHistBucket o1, STHistBucket o2) { return recComparator.compare(o1.getInfoRectangle(), o2.getInfoRectangle()); } }; Collections.sort(candidates, bucketComparator); while(change){ change = false; Iterator<STHist.STHistBucket> itr = candidates.iterator(); STHistBucket b1 = (itr.hasNext()) ? itr.next(): null; while(itr.hasNext()){ STHistBucket b2 = itr.next(); // compute union SpatialHistogramBucket rectangle = new SpatialHistogramBucket(b1.getInfoRectangle()); rectangle.union(b2.getInfoRectangle()); rectangle.setWeight(b1.getInfoRectangle().getWeight() + b2.getInfoRectangle().getWeight()); double skewUnion = computeSkew(rectangle); if (skewUnion <= b1.getSkew() + b2.getSkew()){ tempList.add(new STHistBucket(rectangle, skewUnion)); change = true; if (itr.hasNext()) b1 = itr.next(); else b1 = null; }else{ tempList.add(new STHistBucket(b1.getInfoRectangle(), b1.skew)); b1 = b2; } } if (b1 != null){ tempList.add(new STHistBucket(b1.getInfoRectangle(), b1.skew)); } candidates = tempList; tempList = new ArrayList<STHist.STHistBucket>(candidates.size()); } return candidates; } public List<Pair<Double,SpatialHistogramBucket>> detectHotSpotsMainMemory( DoublePointRectangle universe, double universeFrequency, Cursor<DoublePointRectangle> recs, UnaryFunction<DoublePointRectangle, DoublePoint> toPoint, double universeSizeFraction, double fractionFrequency) throws IOException{ List<Pair<Double,SpatialHistogramBucket>> hotspots = new ArrayList<Pair<Double,SpatialHistogramBucket>>(); // create to cursors one sorted on x and y List<DoublePointRectangle> dataX = new ArrayList<DoublePointRectangle>(10000); List<DoublePointRectangle> dataY = new ArrayList<DoublePointRectangle>(10000); while(recs.hasNext()){ DoublePointRectangle rectangle = recs.next(); dataX.add(new DoublePointRectangle(rectangle)); dataY.add(new DoublePointRectangle(rectangle)); } //sort Collections.sort(dataX, getComparator( toPoint, 0)); Collections.sort(dataY, getComparator( toPoint, 1)); // double windowSizeX = (1/Math.sqrt(universeSizeFraction)) * universe.deltas()[0]; double windowSizeY = (1/Math.sqrt(universeSizeFraction)) * universe.deltas()[1]; double minFreq = universeFrequency/fractionFrequency; if(verbose){ System.out.println("Detect Hot Spots : " +minFreq + ", " + windowSizeX + ", " + windowSizeY); } DoublePointRectangle r = null; DoublePoint p = null; int index = 0; int count = dataX.size(); while(!dataX.isEmpty() && dataX.size() >= minFreq){ r = (DoublePointRectangle) dataX.get(index); p = toPoint.invoke(r); double x = p.getValue(0); double range = x+windowSizeX; boolean currentFreq = computeFreq(dataX, index, minFreq, range, 0, toPoint); if(currentFreq){ // at least F/f elements in MPF x // start to search in y dimension List<DoublePointRectangle> inRegionListY = computeList(dataY, toPoint, x, range, 0); int indexY = 0; DoublePointRectangle ry = null; DoublePoint py = null; while(!inRegionListY.isEmpty() && inRegionListY.size() >= minFreq){ ry = inRegionListY.get(indexY); py = toPoint.invoke(ry); double y = py.getValue(1); double rangeY = y+windowSizeY; boolean currenFreqY = computeFreq(inRegionListY, indexY, minFreq, rangeY, 1, toPoint); if(currenFreqY){ // build hot spot rectangle double[] minPoint = {x, y}; double[] maxPoint = {x+windowSizeX, y+windowSizeY}; boolean quickOverlap = false; DoublePoint pp = new DoublePoint(minPoint); for(Pair<Double,SpatialHistogramBucket> bst: hotspots){ if (bst.getElement2().contains(pp) ){ quickOverlap = true; break; } } if (!quickOverlap){ SpatialHistogramBucket hotspot = adjustDoublePointRectangle(new DoublePointRectangle(minPoint, maxPoint), inRegionListY, toPoint); // boolean overlap = overlapCheck(hotspots, hotspot); if (!overlap){ double skew = computeSkew(hotspot); hotspots.add(new Pair<Double, SpatialHistogramBucket>(skew, hotspot)); if (verbose) System.out.println("Nr: " +hotspots.size() + " HotSpot Found: " + new Pair<Double, SpatialHistogramBucket>(skew, hotspot)); dataX = removeObjects(hotspot, dataX , toPoint);//remove objects from dataX via copy dataY = removeObjects(hotspot, dataY , toPoint);//remove objects from dataY via copy inRegionListY = removeObjects(hotspot, inRegionListY , toPoint);//remove objects from inRegionListY via copy } } } if (!inRegionListY.isEmpty()) inRegionListY.remove(indexY); } } if(!dataX.isEmpty() ) dataX.remove(index); if (verbose && (count - dataX.size()) >= 10000 ){ System.out.print("."); count = dataX.size(); } } if (verbose) System.out.println("\n HotSpots: " + hotspots.size()); return hotspots; } protected List<DoublePointRectangle> removeObjects(DoublePointRectangle queryRectangle, List<DoublePointRectangle> inputList , UnaryFunction<DoublePointRectangle, DoublePoint> toPoint){ List<DoublePointRectangle> outputList = new ArrayList<DoublePointRectangle>(inputList.size()); for(DoublePointRectangle rectangle: inputList){ if (!queryRectangle.contains(toPoint.invoke(rectangle))){ outputList.add(rectangle); } } return outputList; } protected boolean overlapCheck(List<Pair<Double,SpatialHistogramBucket>> hotspots, DoublePointRectangle hotspot){ for(Pair<Double,SpatialHistogramBucket> r: hotspots){ if(r.getElement2().overlaps(hotspot)) return true; } return false; } protected SpatialHistogramBucket adjustDoublePointRectangle(DoublePointRectangle rectangle, List<DoublePointRectangle> inputList , UnaryFunction<DoublePointRectangle, DoublePoint> toPoint){ SpatialHistogramBucket rec = null; for(DoublePointRectangle r : inputList ) { DoublePoint p = toPoint.invoke(r); if (rectangle.contains(p)){ if(rec == null){ rec = new SpatialHistogramBucket(new DoublePointRectangle(p, p)); rec.setWeight(1); } else rec.union(new DoublePointRectangle(p, p)); rec.setWeight(rec.getWeight() + 1); rec.updateAverage(r); } } return rec; } // protected List<DoublePointRectangle> computeList( List<DoublePointRectangle> inputList , UnaryFunction<DoublePointRectangle, DoublePoint> toPoint, double min, double max, int dimension){ List<DoublePointRectangle> subList = new ArrayList<DoublePointRectangle>(20000); DoublePoint p = null; for(DoublePointRectangle dp : inputList){ p = toPoint.invoke(dp); if ( min <= p.getValue(dimension) && p.getValue(dimension) <= max ){ subList.add(new DoublePointRectangle(dp)); } } return subList; } // jump to element protected boolean computeFreq(List<DoublePointRectangle> rec, int position, double minFreq, double max, int dimension, UnaryFunction<DoublePointRectangle, DoublePoint> toPoint){ int jump = (int) Math.ceil(minFreq); if (position+jump-1 > rec.size() -1) return false; DoublePointRectangle r = rec.get(position+jump-1); DoublePoint p = toPoint.invoke(r); return p.getValue(dimension) <= max; } private Comparator<DoublePointRectangle> getComparator(final UnaryFunction<DoublePointRectangle, DoublePoint> toPoint, final int sortDimension ){ return new Comparator<DoublePointRectangle>() { @Override public int compare(DoublePointRectangle arg0, DoublePointRectangle arg1) { DoublePoint p1 = toPoint.invoke(arg0); DoublePoint p2 = toPoint.invoke(arg1); double dimVal1 = p1.getValue(sortDimension); double dimVal2 = p2.getValue(sortDimension); return (dimVal1 == dimVal2) ? 0 : (dimVal1 < dimVal2 ) ? -1: 1; } }; } public static double getSelectivity(List<STHistBucket> buckets, DoublePointRectangle query){ double selectivity = 0; for(STHistBucket bucket: buckets){ selectivity += getSelectivity(bucket, query); } return selectivity; } public static double getSelectivity(STHistBucket bucket, DoublePointRectangle query) { DoublePointRectangle intersect = new DoublePointRectangle(bucket.infoRectangle); if (intersect.overlaps(query)) intersect.intersect(query); else intersect = new DoublePointRectangle(new double[]{0,0}, new double[]{0,0}); if (bucket.isLeaf()){ return (intersect.area()/bucket.infoRectangle.area() )*(double)bucket.infoRectangle.getWeight(); } double leafSum = 0; double volSumChild = 0; for(STHistBucket buck : bucket.childList){ DoublePointRectangle in = new DoublePointRectangle(buck.infoRectangle); if (in.overlaps(query)) in.intersect(query); else in = new DoublePointRectangle(new double[]{0,0}, new double[]{0,0}); volSumChild +=in.area(); leafSum += getSelectivity(buck, query); } volSumChild = intersect.area() - volSumChild; volSumChild /= bucket.infoRectangle.area(); volSumChild *= (double)bucket.infoRectangle.getWeight(); return leafSum + volSumChild; } public static void forest(List<STHistBucket> hist, List<SpatialHistogramBucket> forest){ for(STHistBucket bucket : hist){ STHist.getRectangles(bucket, forest); } } public static void getRectangles(STHistBucket bucket, List<SpatialHistogramBucket> list){ list.add(bucket.infoRectangle); for(STHistBucket b : bucket.childList){ getRectangles(b, list); } } public static class STHistBucket{ private SpatialHistogramBucket infoRectangle; private double skew; private List<STHistBucket> childList; public STHistBucket() { childList = new ArrayList<STHist.STHistBucket>(); } public STHistBucket( SpatialHistogramBucket infoRectangle, double skew) { this(); this.infoRectangle = infoRectangle; this.skew = skew; } public boolean isLeaf(){ return childList.isEmpty(); } public List<STHistBucket> getChildList() { return childList; } public SpatialHistogramBucket getInfoRectangle() { return infoRectangle; } public void setInfoRectangle(SpatialHistogramBucket infoRectangle) { this.infoRectangle = infoRectangle; } public double getSkew() { return skew; } public void setSkew(double skew) { this.skew = skew; } } /** * @param args */ public static void main(String[] args) { } }