/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.spatial.cursors;
import java.util.Iterator;
import xxl.core.collections.queues.Queue;
import xxl.core.collections.queues.Queues;
import xxl.core.comparators.ComparableComparator;
import xxl.core.cursors.AbstractCursor;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.sorters.MergeSorter;
import xxl.core.cursors.unions.Merger;
import xxl.core.cursors.wrappers.IteratorCursor;
import xxl.core.cursors.wrappers.QueueCursor;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.Constant;
import xxl.core.functions.Function;
import xxl.core.predicates.Predicate;
import xxl.core.spatial.KPEzCode;
import xxl.core.spatial.points.FloatPoint;
import xxl.core.util.BitSet;
/**
* This class provides the <bold>MSJ</bold> (Multidimensional Spatial Join) similarity-join algorithm
* proposed by Koudas and Sevick in "[KS 98] Nick Koudas, Kenneth C.
* Sevcik: High Dimensional Similarity Joins: Algorithms and Performance
* Evaluation. ICDE 1998: 466-475" which is the multi-dimensional
* extension of <bold>S3J</bold> (Size Separation Spatial Join) proposed by the same
* authors in "Nick Koudas, Kenneth C. Sevcik: Size Separation Spatial Join.
* SIGMOD Conference 1997: 324-335". (This implementation corresponds to
* the S3J-algorithm if you set the dimensionality to 2).
* <br><br>
* MSJ/S3J performs similar to Orenstein's algorithm with two main differences: First,
* replication is not allowed and second, an I/O strategy based on
* so-called <it>level-files</it> is employed. Moreover, an n-ary recursive
* partitioning is used where n = 2^d (quadtree-partitioning).
*
* <br><br>
* The algorithm starts by partitioning the hypercubes of the input
* relations into level-files according to their levels. Hence, a
* hypercube of level l is kept in the l-th level-file. Then, the level-files are sorted w.r.t.
* the code of the hypercubes. Finally, the Merge algorithm of
* Orenstein is called.
*
* <br><br>
* Deficiencies of this method for high-dimensional intersection
* joins are that a high fraction of the input relation will be in
* level 0. The hypercubes in level 0, however, need to
* be tested against the entire input relation in a nested-loop
* manner. Moreover, [Dittrich and Seeger, ICDE 2000] showed for two dimensions that a modest rate of
* replication considerably speeds up the overall execution time of MSJ.
*
* <br><br>
* (See "[DS 01] GESS: a Scalable Algorithm for Mining Large Datasets in High Dimensional Spaces"
* by Jens-Peter Dittrich and Bernhard Seeger, ACM SIGKDD 2001. pages 47-56. for a
* review of MSJ).
* <br><br>
*
* @see xxl.core.spatial.cursors.Orenstein
* @see xxl.core.spatial.cursors.GESS
* @see xxl.core.spatial.cursors.Mappers
*/
public class MSJ extends Orenstein {
/** This class provides the I/O-strategy of MSJ
*/
public static class MSJSorter extends AbstractCursor {
/**
* The input iteration holding the data to be sorted.
*/
private Cursor cursor;
/**
* Sorts the input data for the multidimensional spatial join.
* @param input the input iterator
* @param maxLevel the maximum level of the grid
* @param dim the dimension of the objects
* @param newQueue a functional factory for generating queues
* @param mem the available size in main memory
*/
public MSJSorter(Iterator input, int maxLevel, final int dim, Function newQueue, final int mem){
Queue[] queues = new Queue[maxLevel+1];
int objectSize = 0;
try{
objectSize = xxl.core.util.XXLSystem.getObjectSize(new KPEzCode(new FloatPoint(dim), new BitSet(32)));
}
catch (Exception e){System.out.println(e);}
Function inputBufferSize = new Constant((int)(0.2*mem)); //determine buffer-size for reading data from disk
Function outputBufferSize = new Constant(mem/queues.length); //determine buffer-size for writing data to disk
for(int i=0; i<=maxLevel; i++) //initialize output-queues
(queues[i] = (Queue) newQueue.invoke( inputBufferSize, outputBufferSize )).open();
while(input.hasNext()){ //write data into "level-files" (see paper of Koudas and Sevcik!)
KPEzCode next = (KPEzCode) input.next();
int level = Math.min(next.getzCode().precision()/dim,maxLevel); //determine queue
queues[ level ].enqueue(next); //insert object into queue
}
Iterator[] iterators = new Iterator[maxLevel+1];
for(int i=0; i<=maxLevel; i++){ //for each level-file: sort w.r.t. space-filling curve
Queue tmp = (Queue) newQueue.invoke( inputBufferSize, outputBufferSize ); //get new Queue for this level-file
tmp.open();
//sort level-file and materialize result into queue
Queues.enqueueAll(
tmp,
new MergeSorter(
new QueueCursor(queues[i]),
new ComparableComparator(),
objectSize,
(int)(mem*0.8),
mem/(maxLevel+1),
newQueue,
false
)
);
iterators[i] = new QueueCursor(tmp);
}
cursor = new Merger(new ComparableComparator(), iterators); //merge sorted streams
}
/**
* @return true if there is another object available
*/
public boolean hasNextObject() {
return cursor.hasNext();
}
/**
* @return the next object
*/
public Object nextObject() {
return cursor.next();
}
}
/**
* The top-level constructor for MSJ
* @param input0 ths first input
* @param input1 the second input
* @param predicate the join predicate
* @param newResult a function for mapping the output to an object
* @param initialCapacity the initial capacity of a bucket
* @param maxLevel the maximum level of the grid
* @param dim the dimension of the objects
* @param newQueue a functional factory for creating queues
* @param mem the available main memory
*/
public MSJ(Cursor input0, Cursor input1, Predicate predicate, Function newResult, final int initialCapacity, final int maxLevel, final int dim, final Function newQueue, final int mem){
super(
input0,
input1,
predicate,
new AbstractFunction(){
public Object invoke(Object input){
return new MSJSorter((Iterator)input, maxLevel, dim, newQueue, mem);
}
},
newResult,
initialCapacity
);
}
/** top-level constructor for a self-join
*/
/*
public MSJ(Cursor input, Predicate predicate, Function newResult, final int initialCapacity, final int type, final int maxLevel, final int dim, final Function newQueue, final int mem){
super( input, predicate,
new AbstractFunction(){
public Object invoke(Object input){
return new MSJSorter((Iterator)input, maxLevel, dim, newQueue, mem);
}
},
newResult, initialCapacity, type
);
}
*/
/**
* The top-level constructor for MSJ
* @param input0 ths first input
* @param input1 the second input
* @param predicate the join predicate
* @param newResult a function for mapping the output to an object
* @param initialCapacity
* @param maxLevel the maximum level of the grid
* @param dim the dimension of the objects
* @param newQueue a functional factory for creating queues
* @param mem the available main memory
*/
public MSJ(Iterator input0, Iterator input1, Predicate predicate, Function newResult, final int initialCapacity, final int maxLevel, final int dim, final Function newQueue, final int mem){
this( new IteratorCursor(input0), new IteratorCursor(input1), predicate, newResult, initialCapacity, maxLevel, dim, newQueue, mem);
}
/** constructor for a self-join
*/
// public MSJ(Iterator input, Predicate predicate, Function newResult, final int initialCapacity, final int maxLevel, final int dim, final Function newQueue, final int mem){
// this( new BufferedCursor(input), predicate, newResult, initialCapacity, THETA_JOIN, maxLevel, dim, newQueue, mem);
// }
}