/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.relational.cursors;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.util.ArrayList;
import xxl.core.cursors.MetaDataCursor;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.AbstractMetaDataFunction;
import xxl.core.functions.Function;
import xxl.core.functions.MetaDataFunction;
import xxl.core.predicates.MetaDataPredicate;
import xxl.core.predicates.Predicate;
import xxl.core.relational.JoinUtils;
import xxl.core.relational.Types;
import xxl.core.relational.metaData.ColumnMetaDataResultSetMetaData;
import xxl.core.relational.metaData.ResultSetMetaDatas;
import xxl.core.relational.metaData.StoredColumnMetaData;
import xxl.core.relational.tuples.ArrayTuple;
import xxl.core.relational.tuples.Tuple;
import xxl.core.relational.tuples.Tuples;
import xxl.core.spatial.KPEzCode;
import xxl.core.spatial.cursors.Mappers;
import xxl.core.spatial.cursors.Orenstein.OrensteinSA;
import xxl.core.spatial.points.FloatPoint;
import xxl.core.spatial.points.Point;
import xxl.core.util.metaData.CompositeMetaData;
/**
* Operator computing a spatial join with the method proposed by Jack
* Orenstein.
*
* <p>The spatial join algorithm based on space-filling curves proposed by Jack
* Orenstein. See: [Ore 91] Jack A. Orenstein: An Algorithm for Computing the
* Overlay of k-Dimensional Spaces. SSD 1991: 381-400 for a detailed
* explanation. See: [DS 01]: Jens-Peter Dittrich, Bernhard Seeger: GESS: a
* Scalable Similarity-Join Algorithm for Mining Large Data Sets in High
* Dimensional Spaces. ACM SIGKDD-2001. for a review on Orensteins
* algorithm.</p>
*
* <p>Orensteins algorithm is based on a binary recursive partitioning, where
* the binary code represents the so-called Z-ordering (z-codes).</p>
*
* <p>Orensteins algorithm (ORE) assigns each hypercube of the input relations
* to disjoint subspaces of the recursive partitioning whose union entirely
* covers the hypercube. ORE sorts the two sets of hypercubes derived from the
* input relations (including the possible replicates) w.r.t. the
* lexicographical ordering of its binary code. After that, the relations are
* merged using two main-memory stacks Stack_R and Stack_S. It is guaranteed
* that for two adjacent hypercubes in the stack, the prefix property is
* satisfied for their associated codes. Only those hypercubes are joined that
* have the same prefix code.</p>
*
* <p>A deficiency of ORE is that the different assignment strategies examined
* in [Ore91] cause substantial replication rates. This results in an increase
* of the problem space and hence, sorting will be very expensive. Furthermore,
* ORE has not addressed the problem of eliminating duplicates in the result
* set.
*
* <p>Note that the method <code>reorganize(final Object currentStatus)</code>
* could actually be implemented with only 1 LOC. For efficiency reasons we use
* a somewhat longer version of the method here.</p>
*
* @see xxl.core.relational.cursors.SortMergeJoin
* @see xxl.core.spatial.cursors.Mappers
* @see xxl.core.spatial.cursors.GESS
* @see xxl.core.spatial.cursors.Replicator
* @see xxl.core.spatial.cursors.Orenstein
*/
public class Orenstein extends SortMergeJoin {
/**
* Static function returning a mapper that gets a tuple as its only input
* parameter and returns a tuple containing a {@link FloatPoint}.
*/
public static MetaDataFunction<Tuple, ArrayTuple, CompositeMetaData<Object, Object>> FLOAT_VALUE_TUPLE_TO_FLOAT_POINT_TUPLE = new AbstractMetaDataFunction<Tuple, ArrayTuple, CompositeMetaData<Object, Object>>() {
protected CompositeMetaData<Object, Object> globalMetaData = new CompositeMetaData<Object, Object>();
{
globalMetaData.add(ResultSetMetaDatas.RESULTSET_METADATA_TYPE, new ColumnMetaDataResultSetMetaData(new StoredColumnMetaData(false, false, false, false, ResultSetMetaData.columnNoNulls, false, 20, "FLOAT_POINT", "FLOAT_POINT", "", 20, 0, "", "", java.sql.Types.JAVA_OBJECT, Types.getSqlTypeName(java.sql.Types.JAVA_OBJECT), true, false, false, "xxl.core.spatial.points.FloatPoint")));
}
@Override
public ArrayTuple invoke(Tuple tuple) {
float[] values = new float[tuple.getColumnCount()];
for (int i = 1; i <= values.length; i++)
values[i-1] = tuple.getFloat(i);
return ArrayTuple.FACTORY_METHOD.invoke(new FloatPoint(values));
}
@Override
public CompositeMetaData<Object, Object> getMetaData() {
return globalMetaData;
}
};
/**
* Static inner class mapping a tuple containg a {@link FloatPoint} to a
* tuple containing a {@link KPEzCode} using
* {@link Mappers#pointToKPEzCodeMappingFunction(float, int)}.
*/
public static class FloatPointKPEzCodeMapper extends Mapper {
/**
* Constructs a new FloatPointKPEzCodeMapper.
*
* @param input the input metadata of the mapper
* @param epsilon epsilon-distance
* @param maxLevel maximum level of the partitioning
*/
public FloatPointKPEzCodeMapper(MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> input, final float epsilon, final int maxLevel) {
super(
new AbstractMetaDataFunction<Tuple, Tuple, CompositeMetaData<Object, Object>>() {
protected Function<Point, KPEzCode> pointToKPEzCodeMappingFunction = Mappers.pointToKPEzCodeMappingFunction(epsilon, maxLevel);
protected CompositeMetaData<Object, Object> globalMetaData = new CompositeMetaData<Object, Object>();
{
globalMetaData.add(ResultSetMetaDatas.RESULTSET_METADATA_TYPE, new ColumnMetaDataResultSetMetaData(new StoredColumnMetaData(false, false, false, false, ResultSetMetaData.columnNoNulls, false, 20, "KPE_Z_CODE", "KPE_Z_CODE", "", 20, 0, "", "", java.sql.Types.JAVA_OBJECT, Types.getSqlTypeName(java.sql.Types.JAVA_OBJECT), true, false, false, "xxl.core.spatial.KPEzCode")));
}
@Override
public Tuple invoke(Tuple tuple) {
return new ArrayTuple(pointToKPEzCodeMappingFunction.invoke((Point)tuple.getObject(1)));
}
@Override
public CompositeMetaData<Object, Object> getMetaData() {
return globalMetaData;
}
},
input
);
}
}
/**
* Constructs a new Orenstein join algorithm.
*
* @param input1 the first input metadata cursor.
* @param input2 the second input metadata cursor.
* @param joinPredicate the join predicate to use (metadata predicate!).
* @param newSorter a function for sorting input cursors.
* @param createTuple a factory method generating the desired tuples
* contained in the cursors.
* @param initialCapacity the initial capacity of the sweep areas.
* @param p fraction of elements to be used from the input.
* @param seed the seed to be used for the sampler.
* @param epsilon epsilon-distance.
* @param maxLevel maximum level of the partitioning.
* @param type the join type (see {@link SortMergeJoin.Type Type}).
*/
private Orenstein(
MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> input1,
MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> input2,
MetaDataPredicate<? super Tuple, CompositeMetaData<Object, Object>> joinPredicate,
Function<? super MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>, ? extends MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>> newSorter,
final Function<Object, ? extends Tuple> createTuple,
int initialCapacity,
double p,
long seed,
float epsilon,
int maxLevel,
Type type
){
super(
newSorter.invoke(
new FloatPointKPEzCodeMapper(
new Sampler(
new Mapper(
FLOAT_VALUE_TUPLE_TO_FLOAT_POINT_TUPLE,
input1
),
p,
seed
),
epsilon,
maxLevel
)
),
newSorter.invoke(
new FloatPointKPEzCodeMapper(
new Sampler(
new Mapper(
FLOAT_VALUE_TUPLE_TO_FLOAT_POINT_TUPLE,
input2
),
p,
seed
),
epsilon,
maxLevel
)
),
joinPredicate,
new OrensteinSA<Tuple>(0, initialCapacity, joinPredicate),
new OrensteinSA<Tuple>(1, initialCapacity, joinPredicate),
Tuples.getTupleComparator(new int[] {1}),
new AbstractFunction<Object, Tuple> () {
@Override
public Tuple invoke(Object o1, Object o2) {
Point p1 = (Point)((KPEzCode)o1).getData();
Point p2 = (Point)((KPEzCode)o2).getData();
ArrayList<Object> values = new ArrayList<Object>(p1.dimensions() + p2.dimensions());
for (int i = 0; i < p1.dimensions(); i++)
values.add(p1.getValue(i));
for (int i = 0; i < p2.dimensions(); i++)
values.add(p2.getValue(i));
return createTuple.invoke(values);
}
},
type
);
}
/**
* Constructs a new Orenstein join algorithm.
*
* @param input1 the first input metadata cursor.
* @param input2 the second input metadata cursor.
* @param joinPredicate the join predicate to use.
* @param newSorter a function for sorting input cursors.
* @param createTuple a factory method generating the desired tuples
* contained in the cursors.
* @param initialCapacity the initial capacity of the sweep areas.
* @param p fraction of elements to be used from the input.
* @param seed the seed to be used for the sampler.
* @param epsilon epsilon-distance.
* @param maxLevel maximum level of the partitioning.
* @param type the join type (see {@link SortMergeJoin.Type Type}). Here, only the types
* THETA_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN and OUTER_JOIN are
* allowed.
*/
public Orenstein(
MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> input1,
MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> input2,
Predicate<? super Tuple> joinPredicate,
Function<? super MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>, ? extends MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>> newSorter,
Function<Object, ? extends Tuple> createTuple,
int initialCapacity,
double p,
long seed,
float epsilon,
int maxLevel,
Type type
) {
this(
input1,
input2,
JoinUtils.thetaJoinMetaDataPredicate(
joinPredicate,
ResultSetMetaDatas.getResultSetMetaData(input1),
ResultSetMetaDatas.getResultSetMetaData(input2)
),
newSorter,
createTuple,
initialCapacity,
p,
seed,
epsilon,
maxLevel,
type
);
if (type != Type.THETA_JOIN && type != Type.LEFT_OUTER_JOIN && type != Type.RIGHT_OUTER_JOIN && type != Type.OUTER_JOIN)
throw new IllegalArgumentException ("Undefined type specified in used constructor. Only types THETA_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN and OUTER_JOIN are allowed.");
}
/**
* Constructs a new Orenstein join algorithm.
*
* @param input1 the first input result set.
* @param input2 the second input result set.
* @param joinPredicate the join predicate to use.
* @param newSorter a function for sorting input cursors.
* @param createTuple a factory method generating the desired tuples
* contained in the cursors.
* @param initialCapacity the initial capacity of the sweep areas.
* @param p fraction of elements to be used from the input.
* @param seed the seed to be used for the sampler.
* @param epsilon epsilon-distance.
* @param maxLevel maximum level of the partitioning.
* @param type the join type (see {@link SortMergeJoin.Type Type}). Here, only the types
* THETA_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN and OUTER_JOIN are
* allowed.
*/
public Orenstein(
ResultSet input1,
ResultSet input2,
Predicate<? super Tuple> joinPredicate,
Function<? super MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>, ? extends MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>> newSorter,
Function<Object, ? extends Tuple> createTuple,
int initialCapacity,
double p,
long seed,
float epsilon,
int maxLevel,
Type type
) {
this(
new ResultSetMetaDataCursor(input1),
new ResultSetMetaDataCursor(input2),
joinPredicate,
newSorter,
createTuple,
initialCapacity,
p,
seed,
epsilon,
maxLevel,
type
);
}
}