SimpleBulkLoadingExample.java example

Explorer
xxl-master
/* XXL: The eXtensible and fleXible Library for data processing

Copyright (C) 2000-2014 Prof. Dr. Bernhard Seeger
                        Head of the Database Research Group
                        Department of Mathematics and Computer Science
                        University of Marburg
                        Germany

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library;  If not, see <http://www.gnu.org/licenses/>. 

    http://code.google.com/p/xxl/

*/
package xxl.core.indexStructures.rtrees;

import java.io.File;
import java.io.IOException;
import java.util.Comparator;
import java.util.Iterator;




import xxl.core.collections.containers.Container;
import xxl.core.collections.containers.CounterContainer;
import xxl.core.collections.containers.io.BlockFileContainer;
import xxl.core.collections.containers.io.BufferedContainer;
import xxl.core.collections.containers.io.ConverterContainer;
import xxl.core.collections.queues.Queue;
import xxl.core.collections.queues.io.BlockBasedQueue;
import xxl.core.collections.queues.io.QueueBuffer;
import xxl.core.cursors.Cursors;
import xxl.core.cursors.mappers.Mapper;
import xxl.core.cursors.sorters.MergeSorter;
import xxl.core.cursors.sources.io.FileInputCursor;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.Function;
import xxl.core.functions.Functional.NullaryFunction;
import xxl.core.functions.Functional.UnaryFunction;
import xxl.core.functions.Identity;
import xxl.core.indexStructures.ORTree;
import xxl.core.indexStructures.RTree;
import xxl.core.indexStructures.SortBasedBulkLoading;
import xxl.core.indexStructures.Tree;
import xxl.core.indexStructures.rtrees.AbstractIterativeRtreeBulkloader.ProcessingType;
import xxl.core.indexStructures.rtrees.GenericPartitioner.CostFunctionArrayProcessor;
import xxl.core.indexStructures.rtrees.GenericPartitioner.DefaultArrayProcessor;
import xxl.core.io.Buffer;
import xxl.core.io.LRUBuffer;
import xxl.core.io.converters.ConvertableConverter;
import xxl.core.io.converters.Converter;
import xxl.core.predicates.AbstractPredicate;
import xxl.core.spatial.SpaceFillingCurves;
import xxl.core.spatial.SpatialUtils;
import xxl.core.spatial.TestPlot;
import xxl.core.spatial.rectangles.DoublePointRectangle;
import xxl.core.spatial.rectangles.Rectangles;
import xxl.core.util.Pair;



/**
 * This class is a brief tutorial for bulk loading R-tree. 
 * 
 * Firstly, we show how to create and to set parameter for R-trees. 
 * Secondly, we bulk load them from scratch using three different bulk loading methods. 
 * We use the following data set: California Streets (rea02.rec); 
 * The data set is obtained from a census TIGER data set and contains 1.888.880 2-dimensional rectangles of California streets segments. 
 * 
 * The data set can be downloaded from http://www.mathematik.uni-marburg.de/~achakeye/data/data/rea02.rec
 * 
 * point query set from http://www.mathematik.uni-marburg.de/~achakeye/data/query_1/rea02.rec
 * 
 * range query set http://www.mathematik.uni-marburg.de/~achakeye/data/query_100/rea02.rec
 * 
 * Please, note, that for testing data set in normalized to a unit cube. 
 * 
 * 
 * 1. {@link SortBasedBulkLoading} is a generic sort based bulk loading 
 * e.g.( N. Roussopoulos and D. Leifker. Direct spatial search  on pictorial databases using packed r-trees.
 * In SIGMOD , pages 17{31, 1985, I. Kamel and C. Faloutsos. On packing r-trees. In CIKM '93, pages 490-499).
 *  We will use both Hilbert and Z-Curve.   
 * 2. {@link STRBulkLoader} is  bulk loading approach developped by Leutenegger et al. (see: 
 * Scott Leutenegger and Mario A. Lopez and J. Edgington
 * STR: A Simple and Efficient Algorithm for R-Tree Packing (ICDE 1997))
 * 3. {@link RtreeIterativeBulkloader}  is a generic sort based bulk loading that uses one dimensional optimal partitioning method proposed in 
 * D Achakeev, B Seeger and P Widmayer: "Sort-based query-adaptive loading of R-trees" in CIKM 2012
 * 
 * Thirdly, we test the query performance of R-trees using the following query files: 
 * TODO
 * 
 * 
 * In our example the data is stored as {@link DoublePointRectangle} objects. 
 * Rtree in XXL uses descriptor that are also of type {@link DoublePointRectangle}. Using {@link Tree#getDescriptor} 
 * function we extract or map from input objects their keys (descriptors). In our case this function is an identity.   
 * 
 * 
 *
 */
public class SimpleBulkLoadingExample {
	/**
	 * Path were the R-trees containers ( {@link BlockFileContainer}) will be stored. 
	 * 
	 * NOTE: change
	 */
	public static String RTREE_PATH ="F://rtree//";
	
	/**
	 * Path to the California Streets (rea02.rec) data set;
	 * NOTE: change
	 */
	public static String DATA_PATH="F://rtree//data//rea02.rec";
	
	/**
	 * Path to the query file with point queries.     
	 * NOTE: change
	 */
	public static String POINT_QUERY_PATH = "F://rtree//query_1//rea02.rec";
	
	/**
	 * Path to the query file with range queries. 
	 * NOTE: change
	 */
	public static String RANGE_QUERY_PATH = "F://rtree//query_100//rea02.rec";
	
	/**
	 * In this example we will use  {@link BlockFileContainer}. Therefore, 
	 */
	public static int BLOCK_SIZE = 4096; // 
	
	/**
	 * We use 2-dimensional data
	 */
	public static int DIMENSION = 2; 
	
	/**
	 * 
	 */
	public static boolean BUFFER = false; 
	
	/**
	 * 
	 */
	public static int BUFFER_PAGES = 10;
	
	/**
	 * 
	 */
	public static boolean HILBERT = true; 
	
	/**
	 * Hilbert two-dimensional comparator
	 */
	public static Comparator<DoublePointRectangle> rectangleComparator2DHilbert = new Comparator<DoublePointRectangle>() {
		
		int defaultSpaceResolution = 1 << 30; // we need this variable to provide a space resolution
		
		@Override
		public int compare(DoublePointRectangle o1, DoublePointRectangle o2) {
			// we use a center point for mapping to a SFC
			// please note, that we assume that input rectangles are already mapped into unit cube
			// otherwise we should map them before (this can be done by providing of an MBR of the space aka universe) 
			double center1[] = (double[])o1.getCenter().getPoint();
			double center2[] = (double[])o2.getCenter().getPoint();
			// now we map double values to integers
			// we use a  resolution of java.Integer 31 bits
			int[] coord1 = new int[center1.length];
			int[] coord2 = new int[center1.length];
			for(int i = 0; i < coord1.length; i++){
				coord1[i] = (int) (center1[i] * (defaultSpaceResolution));
				coord2[i] = (int) (center2[i] * (defaultSpaceResolution));
			}
			long h1 = SpaceFillingCurves.hilbert2d(coord1[0], coord1[1], defaultSpaceResolution); // as we have two-dimensional space
			long h2 =  SpaceFillingCurves.hilbert2d(coord2[0], coord2[1], defaultSpaceResolution);
			return (h1<h2)?-1: ((h1==h2)?0:+1);
		}
	}; 
	
	
	/**
	 * 
	 * Z-Curve comparator
	 * 
	 */
	public static Comparator<DoublePointRectangle> rectangleComparatorZ = new Comparator<DoublePointRectangle>() {
		
		int defaultSpaceResolution = 1 << 30; 
		
		@Override
		public int compare(DoublePointRectangle o1, DoublePointRectangle o2) {
			// we use a center point for mapping to a SFC
			// please note, that we assume that input rectangles are already mapped into unit cube
			// otherwise we should map them before (this can be done by providing of an MBR of the space aka universe) 
			double center1[] = (double[])o1.getCenter().getPoint();
			double center2[] = (double[])o2.getCenter().getPoint();
			// now we map double values to integers
			// we use a  resolution of java.Integer 31 bits
			int[] coord1 = new int[center1.length];
			int[] coord2 = new int[center1.length];
			for(int i = 0; i < coord1.length; i++){
				coord1[i] = (int) (center1[i] * (defaultSpaceResolution));
				coord2[i] = (int) (center2[i] * (defaultSpaceResolution));
			}
			long h1 = SpaceFillingCurves.computeZCode(coord1, 30);
			long h2 =  SpaceFillingCurves.computeZCode(coord2, 30);
			return (h1<h2)?-1: ((h1==h2)?0:+1);
		
		}
	}; 
	
	
	/**
	 * Executes sort based bulk-loading. We in order to conduct bulk-loading we execute the following steps:
	 * 1. Create R-tree 
	 * 2. Initialize container for storage
	 * 3. create converter/serializer
	 * 4. sort data according to sfc 
	 * 5. execute level-by-level loading
	 *  
	 * @return a pair Rtree and CounterContainer, this is used for counting I/Os
	 */
	public static Pair<RTree, CounterContainer> createAndLoadSortBased(){
		// create Rtree
		RTree rtree = new RTree(); 
		//1. create container
		// since we initialize container for the first time,  we need two parameter path and blocksize
		// otherwise we provide only path parameter, block size is then obtained from the meta information stored in blockfile container
		Container fileContainer = new BlockFileContainer(RTREE_PATH + "rtree", BLOCK_SIZE);
		//2.now we need to provide converterContainer that serializes (maps rtree nodes to a blocks)
		// before we can initialize converterContainer, we need initialize node converter of the rtree
		// default descriptor typ of the rtree is DoublePointRectangle. Therefore, we need to provide converter for input objects
		//Since, they are also of type DoublePointRectangle we do the following
		Converter<DoublePointRectangle> objectConverter = new ConvertableConverter<>(Rectangles.factoryFunctionDoublePointRectangle(DIMENSION));
		// we wrap file container with counter
		CounterContainer ioCounter = new CounterContainer(fileContainer);
		Container converterContainer = new ConverterContainer(ioCounter, rtree.nodeConverter(objectConverter, DIMENSION));
		Container treeContainer = converterContainer;
		//3.converterContainer is now responsible for serializing rtree nodes. 
		//4.alternatively we can also provide  main memory buffer  
		//in our case we will use a small buffer of 10 pages 
		//. Since want to test actual I/O query performance, default setting is without buffer. 
		if(BUFFER){
			LRUBuffer<?, ?, ?> lruBuffer = new LRUBuffer<>(BUFFER_PAGES);
			treeContainer = new BufferedContainer(converterContainer, lruBuffer);
		}
		//5. now we can initialize tree
		// the first  argument is null 
		// if we want to reuse an Rtree we can provide root entry,  but in our case we do it for the first time.
		int dataSize = DIMENSION *  2 * 8; // number of bytes needed to store DoublePointRectangle
		int descriptorSize = dataSize; // in our example they are equal
		double minMaxFactor = 0.33; // is used to define a minimal number of elements per node
		rtree.initialize(null, new Identity<DoublePointRectangle>(), treeContainer, BLOCK_SIZE, dataSize, descriptorSize, minMaxFactor); 
		// now we are finished with initializing Rtree before we start with a bulk-loading we initialize  iterator that conducts external sorting
		// first, initialize a temporal container where we will store intermidiate runs
		Container sortedRunsContainer = new BlockFileContainer(RTREE_PATH + "sortedRuns", BLOCK_SIZE);
		// In order to execute external sorting we need to provide a factory function that creates a queue
		// this queue stores a sorted run
		// since we store a queue in container 
		// we can use the following factory method of BlockBasedQueue 
		Function<Function<?, Integer>, Queue<DoublePointRectangle>> queueFactoryFunction = 
				BlockBasedQueue.createBlockBasedQueueFunctionForMergeSorter(sortedRunsContainer, BLOCK_SIZE, objectConverter); 
		// now we initialize a lazy cursor for external sorting
		// we initialize first a cursor that reads a data from a file. for this prupose we need to provide a file and a serializer 
		Iterator<DoublePointRectangle> unsortedInput = new FileInputCursor<DoublePointRectangle>(objectConverter, new File(DATA_PATH));  
		Comparator<DoublePointRectangle> sFCComparator = (HILBERT) ? rectangleComparator2DHilbert : rectangleComparatorZ;
		// we set 10 MB memory for external sorting
		int memorySizeForRuns = 1024*1024*10; // with this value we provide an available memeory for initial run generation; 
		int memoryLastRuns = memorySizeForRuns; // here we provide how much memory is needed for last merge
		Iterator<DoublePointRectangle> sortedRectIterator = new MergeSorter<DoublePointRectangle>(unsortedInput, sFCComparator, dataSize, memorySizeForRuns, 
				memoryLastRuns, queueFactoryFunction, false);
		// now we run sort based bulk loading
		// here we provide a predicate that is used for detecting overflow and triggering next node creation
		// we fill the nodes by 80%
		// Note that we the rtree node header is a 6 bytes large. 
		// the rtree index entry contains an address of a node this is a long value of 8 bytes 
		// and it contains a DoublePointRectangle a serialized size if DIMENSION * 2 *  8 bytes
		final xxl.core.predicates.Predicate<ORTree.Node> overflows = new AbstractPredicate<ORTree.Node>() {
			public boolean invoke(ORTree.Node node){
				if(node.level() == 0)
					return node.number() > ( (int)(((BLOCK_SIZE-6)/(DIMENSION*2*8)) * 0.8) ) ; 
				return node.number() > ( (int)(((BLOCK_SIZE-6)/(DIMENSION*2*8+8)) * 0.8) );
			}
		};
		new SortBasedBulkLoading(rtree, sortedRectIterator, rtree.determineContainer, overflows);
		return new Pair<RTree, CounterContainer>(rtree, ioCounter); 
	}
	
	/**
	 * This method uses {@link STRBulkLoader} class. 
	 * This bulk loader sort data recursively by considering one dimension at time. 
	 * 
	 * For initializing the STR loader we need 
	 * an Rtree, number of dimensions, blocksize, storage utilization per node in percent and so called sortign function
	 * this provides the ordering of dimensions, since str sorts and partitions data according to one dimension at one step
	 * sorting function provides which dimension should be taken as next
 	 * e.g. in two dimensional space we have 4 different sorting functions x,x or x,y or y,y, or x,x
	 * in this example we use a default one x,y
	 * 
	 * We decode x with 0 , y with 1 and etc...
	 * 
	 * @return a pair Rtree and CounterContainer, this is used for counting I/Os
	 * @throws IOException 
	 */
	public static Pair<RTree, CounterContainer> createAndLoadSTR() throws IOException{
		RTree rtree = new RTree(); 
		// and we provide again the object size, since it as DoublePointRectangle in two-dimensional space
		//1. create container
		// since we initialize container for the first time,  we need two parameter path and blocksize
		// otherwise we provide only path parameter, block size is then obtained from the meta information stored in blockfile container
		Container fileContainer = new BlockFileContainer(RTREE_PATH + "strrtree", BLOCK_SIZE);
		//2.now we need to provide converterContainer that serializes (maps rtree nodes to a blocks)
		// before we can initialize converterContainer, we need initialize node converter of the rtree
		// default descriptor typ of the rtree is DoublePointRectangle. Therefore, we need to provide converter for input objects
		//Since, they are also of type DoublePointRectangle we do the following
		Converter<DoublePointRectangle> objectConverter = new ConvertableConverter<>(Rectangles.factoryFunctionDoublePointRectangle(DIMENSION));
		// we wrap file container with counter
		CounterContainer ioCounter = new CounterContainer(fileContainer);
		Container converterContainer = new ConverterContainer(ioCounter, rtree.nodeConverter(objectConverter, DIMENSION));
		Container treeContainer = converterContainer;
		//3.converterContainer is now responsible for serializing rtree nodes. 
		//4.alternatively we can also provide  main memory buffer  
		//in our case we will use a small buffer of 10 pages 
		//. Since want to test actual I/O query performance, default setting is without buffer. 
		if(BUFFER){
			LRUBuffer<?, ?, ?> lruBuffer = new LRUBuffer<>(BUFFER_PAGES);
			treeContainer = new BufferedContainer(converterContainer, lruBuffer);
		}
		//5. now we can initialize tree
		// the first  argument is null 
		// if we want to reuse an Rtree we can provide root entry,  but in our case we do it for the first time.
		int dataSize = DIMENSION *  2 * 8; // number of bytes needed to store DoublePointRectangle
		int descriptorSize = dataSize; // in our example they are equal
		double minMaxFactor = 0.33; // is used to define a minimal number of elements per node
		rtree.initialize(null, new Identity<DoublePointRectangle>(), treeContainer, BLOCK_SIZE, dataSize, descriptorSize, minMaxFactor); 
		// this sorting function is used by str to sort first and to partition  by x-axis and then by y-axis respectively
		int[] sortingFunction = {0,1};  
		STRBulkLoader<DoublePointRectangle> strBulkloader = new STRBulkLoader<>(rtree, RTREE_PATH+"str", DIMENSION, BLOCK_SIZE, 0.33, 0.8, sortingFunction); 
		// before we can start a bulk loading we need to provide
		// the number of objects
		// this can be computed e.g. using the following code pattern
		int number = Cursors.count(new FileInputCursor<>(objectConverter, new File(DATA_PATH))); 
		// again we use 10MB memory for external sorting
		int memorySizeForRuns = 1024*1024*10; // with this value we provide an available memeory for initial run generation and for last merging; 
		// 
//		UnaryFunction<DoublePointRectangle, DoublePointRectangle> identity = (x -> x);  for java 8
		strBulkloader.init(number, memorySizeForRuns, dataSize, objectConverter, new UnaryFunction<DoublePointRectangle, DoublePointRectangle>() {
			
			@Override
			public DoublePointRectangle invoke(DoublePointRectangle arg) {
				return arg;
			}
		}); 
		// conduct bulk-loading
		strBulkloader.buildRTree(new FileInputCursor<>(objectConverter, new File(DATA_PATH)));
		return new Pair<RTree, CounterContainer>(strBulkloader.getRTree(), ioCounter); 
	}

	
	/**
	 * 
	 *  In this method is similar to the {@link #createAndLoadSortBased()}. 
	 *  
	 *  
	 * 
	 * @return
	 * @throws IOException 
	 */
	public static Pair<RTree, CounterContainer> createAndLoadSortBasedOptimal() throws IOException{
		// create Rtree
		RTree rtree = new RTree(); 
		//1. create container
		// since we initialize container for the first time,  we need two parameter path and blocksize
		// otherwise we provide only path parameter, block size is then obtained from the meta information stored in blockfile container
		Container fileContainer = new BlockFileContainer(RTREE_PATH + "rtree_gopt", BLOCK_SIZE);
		//2.now we need to provide converterContainer that serializes (maps rtree nodes to a blocks)
		// before we can initialize converterContainer, we need initialize node converter of the rtree
		// default descriptor typ of the rtree is DoublePointRectangle. Therefore, we need to provide converter for input objects
		//Since, they are also of type DoublePointRectangle we do the following
		Converter<DoublePointRectangle> objectConverter = new ConvertableConverter<>(Rectangles.factoryFunctionDoublePointRectangle(DIMENSION));
		// we wrap file container with counter
		CounterContainer ioCounter = new CounterContainer(fileContainer);
		Container converterContainer = new ConverterContainer(ioCounter, rtree.nodeConverter(objectConverter, DIMENSION));
		Container treeContainer = converterContainer;
		//3.converterContainer is now responsible for serializing rtree nodes. 
		//4.alternatively we can also provide  main memory buffer  
		//in our case we will use a small buffer of 10 pages 
		//. Since want to test actual I/O query performance, default setting is without buffer. 
		if(BUFFER){
			LRUBuffer<?, ?, ?> lruBuffer = new LRUBuffer<>(BUFFER_PAGES);
			treeContainer = new BufferedContainer(converterContainer, lruBuffer);
		}
		//5. now we can initialize tree
		// the first  argument is null 
		// if we want to reuse an Rtree we can provide root entry,  but in our case we do it for the first time.
		int dataSize = DIMENSION *  2 * 8; // number of bytes needed to store DoublePointRectangle
		int descriptorSize = dataSize; // in our example they are equal
		double minMaxFactor = 0.33; // is used to define a minimal number of elements per node
		rtree.initialize(null, new Identity<DoublePointRectangle>(), treeContainer, BLOCK_SIZE, dataSize, descriptorSize, minMaxFactor); 
		// now we are finished with initializing Rtree before we start with a bulk-loading we initialize  iterator that conducts external sorting
		// first, initialize a temporal container where we will store intermidiate runs
		Container sortedRunsContainer = new BlockFileContainer(RTREE_PATH + "sortedRunsGopt", BLOCK_SIZE);
		// In order to execute external sorting we need to provide a factory function that creates a queue
		// this queue stores a sorted run
		// since we store a queue in container 
		// we can use the following factory method of BlockBasedQueue 
		Function<Function<?, Integer>, Queue<DoublePointRectangle>> queueFactoryFunction = 
				BlockBasedQueue.createBlockBasedQueueFunctionForMergeSorter(sortedRunsContainer, BLOCK_SIZE, objectConverter); 
		// now we initialize a lazy cursor for external sorting
		// we initialize first a cursor that reads a data from a file. for this prupose we need to provide a file and a serializer 
		Iterator<DoublePointRectangle> unsortedInput = new FileInputCursor<DoublePointRectangle>(objectConverter, new File(DATA_PATH));  
		Comparator<DoublePointRectangle> sFCComparator = (HILBERT) ? rectangleComparator2DHilbert : rectangleComparatorZ;
		// we set 10 MB memory for external sorting
		int memorySizeForRuns = 1024*1024*10; // with this value we provide an available memeory for initial run generation; 
		int memoryLastRuns = memorySizeForRuns; // here we provide how much memory is needed for last merge
		Iterator<DoublePointRectangle> sortedRectIterator = new MergeSorter<DoublePointRectangle>(unsortedInput, sFCComparator, dataSize, memorySizeForRuns, 
				memoryLastRuns, queueFactoryFunction, false);
		// for initializing we need to provide a partition size
		// since we run a linear versio of optimal partitioning algorith we set this value to 50_000
		int partitionSize = 50_000;
		RtreeIterativeBulkloader<DoublePointRectangle> optBulkloader = new RtreeIterativeBulkloader<>(rtree, RTREE_PATH +"gopt", DIMENSION, BLOCK_SIZE, 0.33, 0.8, partitionSize);
		// this is a deafalt array processor that is used by optimal partitioning algorithms
		// in our case we use volume of MBR for a optimal cost computation
		CostFunctionArrayProcessor<DoublePointRectangle> arrayProcessor = new DefaultArrayProcessor(AbstractIterativeRtreeBulkloader.generateDefaultFunctionVolume()); 
		UnaryFunction<DoublePointRectangle, DoublePointRectangle> toRectangle = new UnaryFunction<DoublePointRectangle, DoublePointRectangle>() {
			
			@Override
			public DoublePointRectangle invoke(DoublePointRectangle arg) {
				return arg;
			}
		};
		optBulkloader.init(arrayProcessor,ProcessingType.GOPT, dataSize, objectConverter, toRectangle);
		optBulkloader.buildRTree(sortedRectIterator);
		return new Pair<RTree, CounterContainer>(optBulkloader.getRTree(), ioCounter); 
	}
	
	
	/**
	 * Builds an Rtree using 
	 * Bulk loading technique
	 * 
	 * Lars Arge, Klaus Hinrichs, Jan Vahrenhold, Jeffrey Scott Vitter: Efficient Bulk Operations on Dynamic R-Trees. Algorithmica 33(1): 104-128 (2002)
	 * 
	 * 
	 * @return
	 */
	public static Pair<? extends RTree, CounterContainer>  loadRtreeBufferDoublePointRectangle(){
		int memorySizeForBuffers= 1024*1024*10; // we provide the same amount of memory for buffers 10 MB
		final int dataSize = DIMENSION *  2 * 8; // number of bytes needed to store DoublePointRectangle
		int descriptorSize = dataSize; // in our example they are equal
		double minMaxFactor = 0.33; // is used to define a minimal number of elements per node
		// you can change this size 
		int memoryEntries = memorySizeForBuffers / dataSize;   // NOTE: actual size of a memory is larger, since we have a constant amount of an additional memory per java object. 	
		int bufferPages = memorySizeForBuffers / BLOCK_SIZE;
		System.out.println(bufferPages);
		System.out.println(memoryEntries);
		BufferedRtree<DoublePointRectangle> rtree = new BufferedRtree<>(BLOCK_SIZE, dataSize, DIMENSION); 
		//1. create container
		// since we initialize container for the first time,  we need two parameter path and blocksize
		// otherwise we provide only path parameter, block size is then obtained from the meta information stored in blockfile container
		Container fileContainer = new BlockFileContainer(RTREE_PATH + "bufferRtree", BLOCK_SIZE);
		//2.now we need to provide converterContainer that serializes (maps rtree nodes to a blocks)
		// before we can initialize converterContainer, we need initialize node converter of the rtree
		// default descriptor typ of the rtree is DoublePointRectangle. Therefore, we need to provide converter for input objects
		//Since, they are also of type DoublePointRectangle we do the following
		Converter<DoublePointRectangle> objectConverter = new ConvertableConverter<>(Rectangles.factoryFunctionDoublePointRectangle(DIMENSION));
		// we wrap file container with counter
		CounterContainer ioCounter = new CounterContainer(fileContainer);
		Container converterContainer = new ConverterContainer(ioCounter, rtree.nodeConverter(objectConverter, DIMENSION));
		//3.converterContainer is now responsible for serializing rtree nodes. 
		//4. we use buffer this implements available memory and holds node buffers
		LRUBuffer<?, ?, ?> lruBuffer = new LRUBuffer<>(bufferPages);
		CounterContainer treeContainer = new  CounterContainer( new BufferedContainer(converterContainer, lruBuffer));
		// now we initialize conatiner that manages buffers
		final Container bufferedContainer = new BufferedContainer(
					new ConverterContainer(new BlockFileContainer(RTREE_PATH + "buffers", BLOCK_SIZE),
							QueueBuffer.getPageConverter(Rectangles.getDoublePointRectangleConverter(DIMENSION))), lruBuffer);
		NullaryFunction<Queue<DoublePointRectangle>> queueFunction = new NullaryFunction<Queue<DoublePointRectangle>>() {
			@Override
			public Queue<DoublePointRectangle> invoke() {
				return new xxl.core.collections.queues.io.QueueBuffer<>(bufferedContainer,dataSize, BLOCK_SIZE);
			}
		};
		//5. now we can initialize tree
		// the first  argument is null 
		// if we want to reuse an Rtree we can provide root entry,  but in our case we do it for the first time.
		rtree.initialize(null, new Identity<DoublePointRectangle>(), treeContainer, BLOCK_SIZE, dataSize, descriptorSize, minMaxFactor); 
		Iterator<DoublePointRectangle> unsortedInput = new FileInputCursor<DoublePointRectangle>(objectConverter, new File(DATA_PATH));  
		rtree.bulkLoad(unsortedInput, queueFunction, memoryEntries); 
		return new Pair<>(rtree, treeContainer); 
	} 
	
	
	/**
	 * This method uses {@link TGSBulkLoader} class. 
	 *  
	 * For initializing the TGS loader we need 
	 * an Rtree, number of dimensions, blocksize, storage utilization per node in percent, and MBR of a data space, this will be used for cost function computation.  
	 * 
	 * 
	 * 
	 * @return a pair Rtree and CounterContainer, this is used for counting I/Os
	 * @throws IOException 
	 */
	public static Pair<RTree, CounterContainer> createAndLoadTGS() throws IOException{
		RTree rtree = new RTree(); 
		// and we provide again the object size, since it as DoublePointRectangle in two-dimensional space
		//1. create container
		// since we initialize container for the first time,  we need two parameter path and blocksize
		// otherwise we provide only path parameter, block size is then obtained from the meta information stored in blockfile container
		Container fileContainer = new BlockFileContainer(RTREE_PATH + "tgsrtree", BLOCK_SIZE);
		//2.now we need to provide converterContainer that serializes (maps rtree nodes to a blocks)
		// before we can initialize converterContainer, we need initialize node converter of the rtree
		// default descriptor typ of the rtree is DoublePointRectangle. Therefore, we need to provide converter for input objects
		//Since, they are also of type DoublePointRectangle we do the following
		Converter<DoublePointRectangle> objectConverter = new ConvertableConverter<>(Rectangles.factoryFunctionDoublePointRectangle(DIMENSION));
		// we wrap file container with counter
		CounterContainer ioCounter = new CounterContainer(fileContainer);
		Container converterContainer = new ConverterContainer(ioCounter, rtree.nodeConverter(objectConverter, DIMENSION));
		Container treeContainer = converterContainer;
		//3.converterContainer is now responsible for serializing rtree nodes. 
		//4.alternatively we can also provide  main memory buffer  
		//in our case we will use a small buffer of 10 pages 
		//. Since want to test actual I/O query performance, default setting is without buffer. 
		if(BUFFER){
			LRUBuffer<?, ?, ?> lruBuffer = new LRUBuffer<>(BUFFER_PAGES);
			treeContainer = new BufferedContainer(converterContainer, lruBuffer);
		}
		//5. now we can initialize tree
		// the first  argument is null 
		// if we want to reuse an Rtree we can provide root entry,  but in our case we do it for the first time.
		int dataSize = DIMENSION *  2 * 8; // number of bytes needed to store DoublePointRectangle
		int descriptorSize = dataSize; // in our example they are equal
		double minMaxFactor = 0.33; // is used to define a minimal number of elements per node
		rtree.initialize(null, new Identity<DoublePointRectangle>(), treeContainer, BLOCK_SIZE, dataSize, descriptorSize, minMaxFactor); 
		// this sorting function is used by str to sort first and to partition  by x-axis and then by y-axis respectively
		TGSBulkLoader<DoublePointRectangle> strBulkloader = new TGSBulkLoader<DoublePointRectangle>(rtree, RTREE_PATH+"tgs", DIMENSION, BLOCK_SIZE, 0.33, 0.8, Rectangles.getUnitUniverseDoublePointRectangle(DIMENSION)); 
		// before we can start a bulk loading we need to provide
		// the number of objects
		// this can be computed e.g. using the following code pattern
		int number = Cursors.count(new FileInputCursor<>(objectConverter, new File(DATA_PATH))); 
		// again we use 10MB memory for external sorting
		int memorySizeForRuns = 1024*1024*10; // with this value we provide an available memeory for initial run generation and for last merging; 
		// 
//		UnaryFunction<DoublePointRectangle, DoublePointRectangle> identity = (x -> x);  for java 8
		strBulkloader.init(number, memorySizeForRuns, dataSize, objectConverter, new UnaryFunction<DoublePointRectangle, DoublePointRectangle>() {
			
			@Override
			public DoublePointRectangle invoke(DoublePointRectangle arg) {
				return arg;
			}
		}); 
		// conduct bulk-loading
		strBulkloader.buildRTree(new FileInputCursor<>(objectConverter, new File(DATA_PATH)));
		return new Pair<RTree, CounterContainer>(strBulkloader.getRTree(), ioCounter); 
	}
	
	/**
	 * 
	 */
	public static void testQuery(Pair<? extends RTree, CounterContainer>  rtreePair, String queryPath){
		double ios = 0;
		double resultsPerQuery = 0; 
		double counter = 0; 
		Converter<DoublePointRectangle> objectConverter = new ConvertableConverter<>(Rectangles.factoryFunctionDoublePointRectangle(DIMENSION));
		
		for(Iterator<DoublePointRectangle> queryRectangles = new FileInputCursor<>( objectConverter, new File(queryPath));
				queryRectangles.hasNext(); ){
			DoublePointRectangle query = queryRectangles.next(); 
			// reset counter before test
			rtreePair.getElement2().reset();
			rtreePair.getElement2().flush(); // if buffer 
			// run query and count results
			int cT = Cursors.count(rtreePair.getElement1().query(query));
			counter++; 
			resultsPerQuery+=cT; 
			ios += rtreePair.getElement2().gets;
			
		}
		System.out.printf("queries %f, avg. per query I/O  %.2f\n", counter, (ios/counter));	
	}
	
	/**
	 * visualizes node MBRs
	 */
	public static void showRtreeLevel(String name, int level, final RTree tree){
		if(level > tree.height())
			throw new RuntimeException("level!");
		Iterator<DoublePointRectangle> levelDescriptors = new Mapper<Object, DoublePointRectangle>(new AbstractFunction<Object, DoublePointRectangle>() {
			
			@Override
			public DoublePointRectangle invoke(Object argument) {
				return (DoublePointRectangle)tree.descriptor(argument);
			}
			
		}, tree.query(level));
		new TestPlot( name, levelDescriptors, 500,  SpatialUtils.universeUnit(DIMENSION));
	}
	
	public static void main(String[] args) throws IOException {
		boolean showTrees = true;
		//create Rtree 
		Pair<RTree, CounterContainer> rtree = createAndLoadSortBased();
		//create STR
		Pair<RTree, CounterContainer> strRtree = createAndLoadSTR();
		//create opt Rtree
		Pair<RTree, CounterContainer> optRtree = createAndLoadSortBasedOptimal();
		//craete buffer rtree
		Pair<? extends RTree, CounterContainer>  bufferRTree = loadRtreeBufferDoublePointRectangle();
		//craete buffer rtree
		Pair<RTree, CounterContainer>  tgsRTree = createAndLoadTGS();
		//conduct point queries
		System.out.println("Point queries");
		testQuery(rtree, POINT_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(strRtree, POINT_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(optRtree, POINT_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(bufferRTree, POINT_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(tgsRTree, POINT_QUERY_PATH);
		System.out.println("\n\n");
		System.out.println("Range queries");
		testQuery(rtree, RANGE_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(strRtree, RANGE_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(optRtree, RANGE_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(bufferRTree, RANGE_QUERY_PATH);
		System.out.println("*********************\n");
		testQuery(tgsRTree, RANGE_QUERY_PATH);
		// show mbr of leaf level
		if(showTrees){
			int leafLevel = 1; 
			showRtreeLevel("RTree Hilbert Curve", leafLevel, rtree.getElement1());
			showRtreeLevel("RTree STR", leafLevel, strRtree.getElement1());
			showRtreeLevel("RTree Hilbert Curve GOPT volume optimized", leafLevel, optRtree.getElement1());
			showRtreeLevel("RTree R* Split top down Arge et al. Buffer loaded", leafLevel, bufferRTree.getElement1());
			showRtreeLevel("RTree TGS loaded with volume as a cost function", leafLevel, tgsRTree.getElement1());
		}
		
	}

}