/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2013 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.spatial.histograms.utils; import java.io.BufferedInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Properties; import xxl.core.collections.containers.Container; import xxl.core.collections.containers.CounterContainer; import xxl.core.collections.containers.io.BlockFileContainer; import xxl.core.collections.containers.io.ConverterContainer; import xxl.core.collections.queues.Queue; import xxl.core.collections.queues.io.BlockBasedQueue; import xxl.core.cursors.Cursor; import xxl.core.cursors.Cursors; import xxl.core.cursors.sorters.MergeSorter; import xxl.core.cursors.sources.io.FileInputCursor; import xxl.core.functions.AbstractFunction; import xxl.core.functions.Constant; import xxl.core.functions.Function; import xxl.core.functions.Functional.UnaryFunction; import xxl.core.indexStructures.ORTree; import xxl.core.indexStructures.RTree; import xxl.core.indexStructures.SortBasedBulkLoading; import xxl.core.indexStructures.rtrees.AbstractIterativeRtreeBulkloader.ProcessingType; import xxl.core.indexStructures.rtrees.GenericPartitioner.CostFunctionArrayProcessor; import xxl.core.indexStructures.rtrees.GenericPartitioner.DefaultArrayProcessor; import xxl.core.indexStructures.rtrees.RtreeIterativeBulkloader; import xxl.core.io.converters.ConvertableConverter; import xxl.core.io.converters.Converter; import xxl.core.predicates.AbstractPredicate; import xxl.core.predicates.Predicate; import xxl.core.spatial.SpatialUtils; import xxl.core.spatial.histograms.utils.RVHistogram.HistType; import xxl.core.spatial.histograms.utils.PartitionerUtils.ProcessorType; import xxl.core.spatial.histograms.utils.STHist.STHistBucket; import xxl.core.spatial.rectangles.DoublePointRectangle; import xxl.core.spatial.rectangles.Rectangles; import xxl.core.util.WrappingRuntimeException; /** * This class implements spatial histograms for selectivity estimation * Assumption: input data is a collection of doublePointRectangles ( {@link DoublePointRectangle} ) in unit space! * * @see D. Achakeev and B. Seeger A class of R-tree histograms for spatial databases GIS 2012 * */ public class MHistograms { /** * Property names of R-tree based histograms */ public static final String RTREE_SORT_PATH = "rtree.sortpath"; public static final String RTREE_PATH = "rtree.path"; public static final String RTREE_BLOCK_SIZE = "rtree.blockSize"; public static final String RTREE_SORT_COMP = "rtree.sort"; // Hilbert 0; ZCurve 1; public static final String RTREE_BULKLOAD = "rtree.bulkload"; // simple 0, gopt 1, sopt 2 public static final String RTREE_DIMENSION = "rtree.dimension"; // simple 0, gopt 1, sopt 2 public static final String RTREE_RATIO = "rtree.ratio"; public static final String RTREE_UTIL = "rtree.storageUtil"; public static final String RTREE_PARTSIZE = "rtree.partSize"; public static final String RTREE_BITS = "rtree.bits"; public static final int NAIVE_BULKLOAD = 0; public static final int GOPT_BULKLOAD = 1; public static final int SOPT_BULKLOAD = 2; public static final int BITS_PRO_DIM = 31; public static final int FILLING_CURVE_PRECISION = 1 << (BITS_PRO_DIM-1); public static final String RKHIST_U_RATIO = "rkhist.ratio"; /** * Property names of MinSkew based histograms */ public static final String MINSKEW_DIM = "minskew.dim"; public static final String MINSKEW_PATH = "minskew.path"; public static final String MINSKEW_GRID_SIZE = "minskew.gridsize"; // in bits pro dim public static final String MINSKEW_REF = "minskew.ref"; /** * simple descriptor function */ @SuppressWarnings("deprecation") public static Function dataDescriptor = new AbstractFunction(){ public Object invoke(Object o){ return (DoublePointRectangle)o; } }; /** * Groundwork class for spatial histogram generation. * */ public static abstract class AbstractSelHistogram implements MHistogram{ /** * intern representation of a histogram * a list that contains a buckets of type {@link SpatialHistogramBucket} */ protected List<SpatialHistogramBucket> histogram; /** * sets properties for histogram method * @param props */ protected abstract void setProperties(Properties props); /* * (non-Javadoc) * @see xxl.core.spatial.histograms.utils.MHistogram#getSelectivity(xxl.core.spatial.rectangles.DoublePointRectangle) */ @Override public double getSelectivity(DoublePointRectangle queryRec) { return SpatialUtils.computeEstimation(histogram.iterator(), queryRec); } /* * (non-Javadoc) * @see xxl.core.spatial.histograms.utils.MHistogram#getBuckets() */ public List<SpatialHistogramBucket> getBuckets(){ return histogram; } /* * (non-Javadoc) * @see xxl.core.spatial.histograms.utils.MHistogram#numberOfBuckets() */ @Override public int numberOfBuckets() { return histogram.size(); } } /** * * This is a groundwork class for R-tree based histograms. * * This class provides method for R-tree bulk loading. To bulk load an R-tree a sort based algorithm is used. * In case of 2 dimensions a hilbert comparator is used, otherwise we use a Z-Curve. * * */ public static class RTreeBasicHistogram extends AbstractSelHistogram { /** * default parameters */ /** * Blocks size of R-tree */ protected int blockSize = 4096; // default value /** * number of dimensions, default 2 * */ protected int dimension = 2; // default value /** * input rectangle (DoublePointRectangle) converter * */ protected Converter<DoublePointRectangle> converter = new ConvertableConverter<DoublePointRectangle>(SpatialUtils.factoryFunction(dimension));// default 2D Converter /** * comparator, based on SFC mapping; For two-dimensional space we use Hilbert, otherwise Z-Curve. */ protected Comparator<DoublePointRectangle> comparator = SpatialUtils.getHilbert2DComparator(SpatialUtils.universeUnit(dimension), FILLING_CURVE_PRECISION); // default 2D comparator /** * Path were temporal sort files are stored */ protected String sortpath = "./"; // default /** * Path were temporal R-tree is stored */ protected String rtreePath ="./"; //default /** * bulk load type */ protected int bulkLoad = 0; // DEAFULT /** * min R-tree page capacity, defined by the fraction of page capacity default value 0.33 */ protected double rtreeRatio = 0.33; // default /** * average target R-tree page capacity, defined by the fraction of page capacity default value 0.8 */ protected double loadRatio = 0.8; // default /** * maximal number of entries for optimal partitioning computation during the loading of R-tree, default value 50.000 rectangles */ protected int partitionSize = 50000; // default /** * number of bits used in a SFC key computation. SFC key is a long value. */ protected int bitProDim = 63/dimension; /** * mask for SFC computation */ protected int precision = 1 << (bitProDim-1); /** * function for optimal partitioning e.g. volume based */ DefaultArrayProcessor arrayProcessor = null; /** * sets properties */ protected void setProperties(Properties props){ try{ bitProDim = new Integer(props.getProperty(RTREE_BITS, "31")); // precision = 1 << (bitProDim-1); blockSize = new Integer(props.getProperty(RTREE_BLOCK_SIZE, "4096")); // default value dimension = new Integer(props.getProperty(RTREE_DIMENSION, "2")); // default value converter = new ConvertableConverter<DoublePointRectangle>(SpatialUtils.factoryFunction(dimension));// default 2D Converter comparator = (dimension == 2 ) ? SpatialUtils.getHilbert2DComparator(SpatialUtils.universeUnit(dimension),precision) : SpatialUtils.getZCurveComparator(SpatialUtils.universeUnit(dimension), bitProDim); // : RGOhist.getHilbertComparator(RGOhist.universeUnit(dimension), bitProDim); // default 2D comparator // comparator = // RGOhist.getZCurveComparator(RGOhist.universeUnit(dimension), 31); // default 2D comparator sortpath = props.getProperty(RTREE_PATH, "./") + "sortTmp"; // default rtreePath =props.getProperty(RTREE_PATH, "./"); //default bulkLoad = new Integer(props.getProperty(RTREE_BULKLOAD, "0")); // DEAFULT if (bulkLoad < 0 && bulkLoad > 3) throw new RuntimeException("simple: 0, gopt: 1, sopt: 2 "); rtreeRatio = new Double(props.getProperty(RTREE_RATIO, "0.33")); // default loadRatio = new Double(props.getProperty(RTREE_UTIL, "0.8")); // default partitionSize = new Integer(props.getProperty(RTREE_PARTSIZE, "50000")); // default }catch(NumberFormatException ex){ throw new RuntimeException("Check property! ", ex); } } /* * (non-Javadoc) * @see xxl.core.spatial.histograms.utils.MHistogram#buildHistogram(xxl.core.cursors.Cursor, int, java.util.Properties) */ @Override public void buildHistogram(Cursor<DoublePointRectangle> rectangles, int numberOfBuckets, Properties props) throws IOException { setProperties(props); //sortdata Cursor<DoublePointRectangle> sortedRectangles = sortData(rectangles); RTree tree = null; if(bulkLoad == NAIVE_BULKLOAD ){ tree = buildSimpleRtree(sortedRectangles); }else{ ProcessingType pType = (bulkLoad == GOPT_BULKLOAD) ? ProcessingType.GOPT : ProcessingType.SOPT_F; tree = buildExtRtree(sortedRectangles, pType); } histogram = SpatialUtils.computeSimpleRTreeHistogram(tree, numberOfBuckets); } /** * this method sorts data according hilbert or z-curve * @param rectangles * @return */ @SuppressWarnings({ "deprecation", "serial", "unchecked" }) protected Cursor<DoublePointRectangle> sortData(Cursor<DoublePointRectangle> rectangles){ final Container queueContainer = new BlockFileContainer(sortpath + "tmpsortqueue.tmp" , 4096); final Function<Function<?, Integer>, Queue<?>> queueFunction = new AbstractFunction<Function<?, Integer>, Queue<?>>() { public Queue<?> invoke(Function<?, Integer> function1, Function<?, Integer> function2) { return new BlockBasedQueue(queueContainer, 4096, converter, function1, function2); } }; return new MergeSorter(rectangles, comparator, 8*2*dimension, 4096 * 2000, 4096 * 2000, queueFunction, false); } /** * Builds R-tree using {@link SortBasedBulkLoading} method. * * * * @param rectCursor * @return */ protected RTree buildSimpleRtree(Cursor<DoublePointRectangle> rectCursor){ RTree sortBasedRTree = new RTree(); CounterContainer treeCounter = new CounterContainer(new BlockFileContainer(rtreePath, blockSize)); Container treeContainer = new ConverterContainer(treeCounter, sortBasedRTree.nodeConverter(converter, dimension)); sortBasedRTree.initialize(dataDescriptor, treeContainer, blockSize, (8*2*dimension) + 4, (8*2*dimension) + 8, 0.33); final Predicate<ORTree.Node> overflows = new AbstractPredicate<ORTree.Node>() { public boolean invoke(ORTree.Node node) { if (node.level() == 0) return node.number() > ((int) (((blockSize-6) / (8*2*dimension)) * loadRatio)); return node.number() > ((int) (((blockSize-6) / (8*2*dimension + 8)) * loadRatio)); } }; new SortBasedBulkLoading(sortBasedRTree,rectCursor, sortBasedRTree.determineContainer, overflows); return sortBasedRTree; } /** * * @param rectCursor * @param container * @return */ public RTree buildSimpleRtree(Cursor<DoublePointRectangle> rectCursor, Container container){ RTree sortBasedRTree = new RTree(); CounterContainer treeCounter = new CounterContainer(container); Container treeContainer = new ConverterContainer(treeCounter, sortBasedRTree.nodeConverter(converter, dimension)); sortBasedRTree.initialize(dataDescriptor, treeContainer, blockSize, (8*2*dimension) + 4, (8*2*dimension) + 8, 0.33); final Predicate<ORTree.Node> overflows = new AbstractPredicate<ORTree.Node>() { public boolean invoke(ORTree.Node node) { if (node.level() == 0) return node.number() > ((int) (((blockSize-6) / (8*2*dimension)) * loadRatio)); return node.number() > ((int) (((blockSize-6) / (8*2*dimension + 8)) * loadRatio)); } }; new SortBasedBulkLoading(sortBasedRTree,rectCursor, sortBasedRTree.determineContainer, overflows); return sortBasedRTree; } /** * * Builds an R-tree using {@link RtreeIterativeBulkloader} * * @param rectCursor * @param pType * @return * @throws IOException */ protected RTree buildExtRtree(Cursor<DoublePointRectangle> rectCursor, xxl.core.indexStructures.rtrees.RtreeIterativeBulkloader.ProcessingType pType) throws IOException{ RTree sortBasedRTree = new RTree(); Container treeContainer = new ConverterContainer( new BlockFileContainer(rtreePath, blockSize), sortBasedRTree.nodeConverter(Rectangles.getDoublePointRectangleConverter(dimension), dimension)); sortBasedRTree.determineContainer = new Constant<Object>(treeContainer); sortBasedRTree.getContainer =new Constant<Object>(treeContainer); boolean processList = (pType == ProcessingType.GOPT) ? true: false; double[] sideLength = new double[dimension]; for(int i = 0; i < sideLength.length; i++){ sideLength[i] = 0d; } UnaryFunction<DoublePointRectangle, Double> costFunction = RtreeIterativeBulkloader.generateDefaultFunction(sideLength); DefaultArrayProcessor arrayProcessor = new DefaultArrayProcessor(costFunction, processList); RtreeIterativeBulkloader<DoublePointRectangle> bulkLoader = new RtreeIterativeBulkloader<DoublePointRectangle>(sortBasedRTree, rtreePath, dimension, blockSize, rtreeRatio, loadRatio, partitionSize); bulkLoader.init(arrayProcessor, pType, dimension*8*2, Rectangles.getDoublePointRectangleConverter(dimension), new UnaryFunction<DoublePointRectangle, DoublePointRectangle>() { @Override public DoublePointRectangle invoke(DoublePointRectangle arg) { return new DoublePointRectangle(arg); } }); bulkLoader.buildRTree(rectCursor); return bulkLoader.getRTree(); } } /** * Two step R-tree based histogram. * * In the first step R-tree leaf nodes are build using gopt strategy. * In the second step histogram is constructed using opt strategy. * In the basic variant a sum of volumes of bucket MBR are used for optimization. * * We refer for details to D. Achakeev and B. Seeger A class of R-tree histograms for spatial databases GIS 2012 * * p := number of buckets * B = N/(p*avgLoad) * b = B*minBound * * As histogram is build using opt partitioning strategy, in the second step dynamic programming table occupies quadratic space in the number of input leafs. * Therefore, we use a simple heuristic. We partition the input set of leaf nodes in chunks of 20 000 pages according to the sorting order * and apply opt partitioning on each. * * * */ public static class RHistogram extends RTreeBasicHistogram{ public static final int BITS_PRO_DIM = 8; public static final int B_FACTOR = 2; double[] sideLength = new double[dimension]; DoublePointRectangle universe; public HistType type; ProcessorType pType; double rtreeratio, avgratio, hratio; public RTree tree = null; public RHistogram(int dimension, int blockSize, double rtreeRatio, double hratio, double avgratio, HistType type, ProcessorType pType){ this.blockSize = blockSize; this.rtreeratio = rtreeRatio; this.avgratio = avgratio; this.hratio = hratio; this.type = type; this.dimension = dimension; this.pType = pType; sideLength = new double[dimension]; for(int i = 0; i < sideLength.length; i++){ sideLength[i] = 0d; } } public RHistogram(int dimension, int blockSize, double rtreeRatio, double hratio, double avgratio, HistType type, ProcessorType pType, Cursor<DoublePointRectangle> queryPoints, DoublePointRectangle universe) { this(dimension, blockSize, rtreeRatio, hratio,avgratio, type, pType); sideLength = SpatialUtils.computeQuerySides(queryPoints, dimension, universe); } @Override public void buildHistogram(Cursor<DoublePointRectangle> rectangles, int numberOfBuckets, Properties props) throws IOException { // set properties setProperties(props); // build rtree Cursor<DoublePointRectangle> sortedRectangles = null; if(tree == null){ sortedRectangles = sortData(rectangles); tree = buildExtRtree(sortedRectangles, ProcessingType.GOPT); } int count = Cursors.count(SpatialUtils.getRectanglesLevel1(tree)); if(count <= numberOfBuckets){ this.histogram = SpatialUtils.computeSimpleRTreeHistogram(tree, numberOfBuckets); return; } // after computing the leaf node level of R-tree // we set the parameter for Hisotgram generation // they set in dependency of desired number of buckets double f = count/ (double)numberOfBuckets; // 1-avgLoad int d = (int) Math.ceil((count/ ((double)numberOfBuckets))); int b = (int)(Math.max(Math.floor(f * hratio), 1)); b = Math.max(b, 2); int B = b+d;// if(type==HistType.GOPT ){ b = count/numberOfBuckets; B = B_FACTOR *b; } UnaryFunction<DoublePointRectangle, Double> function = RtreeIterativeBulkloader.generateDefaultFunction(sideLength); CostFunctionArrayProcessor<DoublePointRectangle> arrayProcessor = null; switch(pType){ case RK_HIST : arrayProcessor = new PartitionerUtils.RKHistMetrikProcessor(B, count, true); break; case GRID_SSE: rectangles.reset(); arrayProcessor = new PartitionerUtils.SpatialSkewProcessor( rectangles, BITS_PRO_DIM , dimension, B, count, true); break; default : arrayProcessor = new DefaultArrayProcessor(function); } double rat = ((double)f)/ (double)B; if (count > 20000) // 128*128 this.histogram = RVHistogram.computeHistogramOPT( SpatialUtils.getRectanglesLevel1(tree), b , B, count, numberOfBuckets, rat, arrayProcessor, type, 10000); else this.histogram = RVHistogram.computeHistogramOPT( SpatialUtils.getRectanglesLevel1(tree), b , B, count, numberOfBuckets, arrayProcessor, type); } public void buildRtree(Cursor<DoublePointRectangle> rectangles) throws IOException{ Cursor<DoublePointRectangle> sortedRectangles = null; if(tree == null){ sortedRectangles = sortData(rectangles); tree = buildExtRtree(sortedRectangles, ProcessingType.GOPT); } } public void buildHistogram( int numberOfBuckets, Properties props, int b, int B) throws IOException { // set properties setProperties(props); int count = Cursors.count(SpatialUtils.getRectanglesLevel1(tree)); if(count <= numberOfBuckets){ this.histogram = SpatialUtils.computeSimpleRTreeHistogram(tree, numberOfBuckets); return; } //.out.println("B for tree " + B +" min b " +b + " entries to consider " + count); if(type==HistType.GOPT ){ b = count/numberOfBuckets; B = B_FACTOR *b; } UnaryFunction<DoublePointRectangle, Double> function = RtreeIterativeBulkloader.generateDefaultFunction(sideLength); CostFunctionArrayProcessor arrayProcessor = null; arrayProcessor = new DefaultArrayProcessor(function); double rat = 1.0/ avgratio; if (count > 40000) // this.histogram = PHist2L.computeHistogramOPT( // RGOhist.getRectanglesLevel1(tree), b , B, count, numberOfBuckets, avgratio, arrayProcessor, type, 128*128); this.histogram = RVHistogram.computeHistogramOPT( SpatialUtils.getRectanglesLevel1(tree), b , B, count, numberOfBuckets, rat, arrayProcessor, type, 128*128); else this.histogram = RVHistogram.computeHistogramOPT( SpatialUtils.getRectanglesLevel1(tree), b , B, count, numberOfBuckets, arrayProcessor, type); } /** * * @param numberOfBuckets */ public void buildSimpleHist(int numberOfBuckets){ histogram = SpatialUtils.computeSimpleRTreeHistogram(tree, numberOfBuckets); } } /** * Histogram which implements * RK-Hist * */ public static class RKHistHistogram extends RTreeBasicHistogram{ protected double undersampligRatio = 0.1; // default value RTree tree = null; @Override public void buildHistogram(Cursor<DoublePointRectangle> rectangles, int numberOfBuckets, Properties props) throws IOException { setProperties(props); //sortdata if(tree == null){ // System.out.println("null"); Cursor<DoublePointRectangle> sortedRectangles = sortData(rectangles); if(bulkLoad == NAIVE_BULKLOAD ){ tree = buildSimpleRtree(sortedRectangles); }else{ ProcessingType pType = (bulkLoad == GOPT_BULKLOAD) ? ProcessingType.GOPT : ProcessingType.SOPT_F; // System.out.println("ext"); tree = buildExtRtree(sortedRectangles, pType); } } int numberOfNodes = Cursors.count(SpatialUtils.getNodes(tree, 1)); histogram = RKhist.buildRKHist(tree, numberOfNodes, numberOfBuckets, undersampligRatio, dimension); } @Override protected void setProperties(Properties props) { super.setProperties(props); try{ undersampligRatio = new Double(props.getProperty(RKHIST_U_RATIO, "0.1")); }catch(NumberFormatException ex){ throw new RuntimeException("check property"); } } @Override public RTree buildSimpleRtree(Cursor<DoublePointRectangle> rectCursor, Container container) { // long time = System.currentTimeMillis(); Cursor<DoublePointRectangle> sortedRectangles = sortData(rectCursor); tree = super.buildSimpleRtree(sortedRectangles, container); // System.out.println((System.currentTimeMillis()-time) + " time"); return tree; } } /** * MinSkew Histogram basic variant * * */ public static class MinSkewHistogram extends AbstractSelHistogram{ protected String tempPath = "./cursor.tmp"; // default value for the path protected int dimension = 2; // default protected int bitsPerDim = 7; // default; 1024 * dimesnions @Override public void buildHistogram(Cursor<DoublePointRectangle> rectangles, int numberOfBuckets, Properties props) throws IOException { setProperties(props); createTmpFile(rectangles); Cursor<DoublePointRectangle> recCursor = new FileInputCursor<DoublePointRectangle>( new ConvertableConverter<DoublePointRectangle>(SpatialUtils.factoryFunction(dimension)), new File(tempPath)){ @Override public boolean supportsReset() { return true; } @Override public void reset() throws UnsupportedOperationException { super.reset(); try { input = new DataInputStream( new BufferedInputStream( new FileInputStream(new File(tempPath)), 4096 ) ); } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } }; this.histogram = MinSkewHist.buildHistogram(recCursor, SpatialUtils.universeUnit(dimension), bitsPerDim, dimension, numberOfBuckets); } @Override protected void setProperties(Properties props) { try{ dimension = new Integer(props.getProperty(MINSKEW_DIM, "2")); // default value tempPath = props.getProperty(MINSKEW_PATH, "./cursor.tmp"); bitsPerDim = new Integer(props.getProperty(MINSKEW_GRID_SIZE, "7")); }catch(NumberFormatException ex){ throw new RuntimeException("Check property! ", ex); } } private void createTmpFile(Cursor<DoublePointRectangle> rectangles) throws IOException{ DataOutputStream stream = new DataOutputStream(new FileOutputStream(new File(tempPath))); try{ while(rectangles.hasNext()){ DoublePointRectangle dpr = rectangles.next(); dpr.write(stream); stream.flush(); } }finally{ if (stream != null) stream.close(); } } } /** * MinSkew Histogram basic variant * * */ public static class MinSkewProgressiveRefinementHistogram extends MinSkewHistogram{ protected int refSteps = 2; // default; 1024 * dimesnions @Override public void buildHistogram(Cursor<DoublePointRectangle> rectangles, int numberOfBuckets, Properties props) throws IOException { setProperties(props); createTmpFile(rectangles); Cursor<DoublePointRectangle> recCursor = new FileInputCursor<DoublePointRectangle>( new ConvertableConverter<DoublePointRectangle>(SpatialUtils.factoryFunction(dimension)), new File(tempPath)){ @Override public boolean supportsReset() { return true; } @Override public void reset() throws UnsupportedOperationException { super.reset(); try { input = new DataInputStream( new BufferedInputStream( new FileInputStream(new File(tempPath)), 4096 ) ); } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } }; this.histogram = MinSkewHist.buildProgressiveRefinement(recCursor, SpatialUtils.universeUnit(dimension), bitsPerDim, dimension, numberOfBuckets, refSteps); } @Override protected void setProperties(Properties props) { try{ dimension = new Integer(props.getProperty(MINSKEW_DIM, "2")); // default value tempPath = props.getProperty(MINSKEW_PATH, "./cursor.tmp"); bitsPerDim = new Integer(props.getProperty(MINSKEW_GRID_SIZE, "7")); refSteps = new Integer(props.getProperty(MINSKEW_REF, "2")); }catch(NumberFormatException ex){ throw new RuntimeException("Check property! ", ex); } } private void createTmpFile(Cursor<DoublePointRectangle> rectangles) throws IOException{ DataOutputStream stream = new DataOutputStream(new FileOutputStream(new File(tempPath))); try{ while(rectangles.hasNext()){ DoublePointRectangle dpr = rectangles.next(); dpr.write(stream); stream.flush(); } }finally{ if (stream != null) stream.close(); } } } /** * * * */ public static class STHistForest implements MHistogram{ public static final String SKEW_GRID_SIZE = "skew.gridsize"; // in bits pro dim protected int gridSize; protected List<STHistBucket> forest; protected List<SpatialHistogramBucket> buckets; protected double samplingRate = 1.0; public STHistForest() { super(); this.samplingRate = 1.0; } public STHistForest(double samplingRate) { super(); this.samplingRate = samplingRate; System.out.println("StForest with samplig rate = " + samplingRate); } protected void setProperties(Properties props) { try{ gridSize = new Integer(props.getProperty(SKEW_GRID_SIZE, "7")); }catch(NumberFormatException ex){ throw new RuntimeException("Check property! ", ex); } } @Override public void buildHistogram(Cursor<DoublePointRectangle> rectangles, int numberOfBuckets, Properties props) throws IOException { STHist histogram = new STHist(); histogram.buildHotSpotForest(rectangles, SpatialUtils.universeUnit(2), numberOfBuckets); forest = histogram.forest; buckets = new ArrayList<SpatialHistogramBucket>(); STHist.forest(forest, buckets); } @Override public double getSelectivity(DoublePointRectangle queryRec) { return STHist.getSelectivity(forest, queryRec) * ( 1/samplingRate); } /** * * @return */ public List<SpatialHistogramBucket> getBuckets(){ return buckets; } @Override public int numberOfBuckets() { return buckets.size(); } } }