/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.relational.cursors;
import java.sql.ResultSet;
import xxl.core.collections.bags.Bag;
import xxl.core.collections.queues.Queue;
import xxl.core.cursors.MetaDataCursor;
import xxl.core.functions.Function;
import xxl.core.predicates.Predicate;
import xxl.core.relational.metaData.ResultSetMetaDatas;
import xxl.core.relational.tuples.Tuple;
import xxl.core.util.metaData.CompositeMetaData;
/**
* A nested-loops implementation of the operator "Distinct". This class uses
* the algorithm of {@link xxl.core.cursors.distincts.NestedLoopsDistinct} and
* additionally forwards the metadata.
*
* <p>Depending on the specified memory size and object size as many elements
* as possible will be inserted into a temporal bag (typically located in main
* memory). To guarantee that no duplicates will be inserted the bag is
* searched for duplicates with the help of a predicate specified in the
* constructor. If not all elements can be inserted into the bag they will be
* stored in a queue and will be inserted when the bag emptied due to calls to
* <code>this.next()</code>.</p>
*
* <p>Example:
* <code><pre>
* new NestedLoopsDistinct<Object>(cursor, 100000, 200);
* // 100000: amount of memory used in Bytes
* // 200: size of each Tuple
* </pre></code>
* This distinct-operator handles up to (100000/200)-1 tuples in main memory at
* one time. Additional tuples will be stored in a bag.</p>
*/
public class NestedLoopsDistinct extends xxl.core.cursors.distincts.NestedLoopsDistinct<Tuple> implements MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> {
/**
* An internal variable used for storing the metadata information of this
* distinct operator.
*/
protected CompositeMetaData<Object, Object> globalMetaData;
/**
* Creates a new nested-loops distinct operator. Determines the maximum
* number of elements that can be stored in the bag used for the temporal
* storage of the elements of the input cursor:
* <pre>
* maxTuples = memSize / objectSize - 1
* </pre>.
*
* @param cursor the input metadata cursor delivering the elements.
* @param memSize the available memory size (bytes) for the bag.
* @param objectSize the size (bytes) needed to store one element.
* @param predicate the predicate returning <code>true</code> if two
* elements are equal.
* @param newBag a function without parameters returning an empty bag whose
* <code>bag.cursor()</code> must support <code>remove()</code>.
* @param newQueue a function without parameters returning an empty queue.
* @throws IllegalArgumentException if not enough memory is available.
*/
public NestedLoopsDistinct(MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> cursor, int memSize, int objectSize, Predicate<? super Tuple> predicate, Function<?, ? extends Bag<Tuple>> newBag, Function<?, ? extends Queue<Tuple>> newQueue) {
super(cursor, memSize, objectSize, predicate, newBag, newQueue);
globalMetaData = new CompositeMetaData<Object, Object>();
globalMetaData.add(ResultSetMetaDatas.RESULTSET_METADATA_TYPE, ResultSetMetaDatas.getResultSetMetaData(cursor));
}
/**
* Creates a new nested-loops distinct operator. Determines the maximum
* number of elements that can be stored in the bag used for the temporal
* storage of the elements of the input cursor:
* <pre>
* maxTuples = memSize / objectSize - 1
* </pre>.
* Uses the Predicate <code>Equal.DEFAULT_INSTANCE</code> and default
* factory methods for the classes list-bags and array-queues.
*
* @param cursor the input metadata cursor delivering the elements.
* @param memSize the available memory size (bytes) for the bag.
* @param objectSize the size (bytes) needed to store one element.
* @throws IllegalArgumentException if not enough memory is available.
*/
public NestedLoopsDistinct(MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> cursor, int memSize, int objectSize) {
super(cursor, memSize, objectSize);
globalMetaData = new CompositeMetaData<Object, Object>();
globalMetaData.add(ResultSetMetaDatas.RESULTSET_METADATA_TYPE, ResultSetMetaDatas.getResultSetMetaData(cursor));
}
/**
* Creates a new nested-loops distinct operator. Determines the maximum
* number of elements that can be stored in the bag used for the temporal
* storage of the elements of the input result set:
* <pre>
* maxTuples = memSize / objectSize - 1
* </pre>.
*
* @param resultSet the input rsult set delivering the elements. The result
* set is wrapped to a metadata cursor using
* {@link ResultSetMetaDataCursor}.
* @param memSize the available memory size (bytes) for the bag.
* @param objectSize the size (bytes) needed to store one element.
* @param predicate the predicate returning <code>true</code> if two
* elements are equal.
* @param newBag a function without parameters returning an empty bag whose
* <code>bag.cursor()</code> must support <code>remove()</code>.
* @param newQueue a function without parameters returning an empty queue.
* @throws IllegalArgumentException if not enough memory is available.
*/
public NestedLoopsDistinct(ResultSet resultSet, int memSize, int objectSize, Predicate<? super Tuple> predicate, Function<?, ? extends Bag<Tuple>> newBag, Function<?, ? extends Queue<Tuple>> newQueue) {
this(new ResultSetMetaDataCursor(resultSet), memSize, objectSize, predicate, newBag, newQueue);
}
/**
* Creates a new nested-loops distinct operator. Determines the maximum
* number of elements that can be stored in the bag used for the temporal
* storage of the elements of the input result set:
* <pre>
* maxTuples = memSize / objectSize - 1
* </pre>.
* Uses the Predicate <code>Equal.DEFAULT_INSTANCE</code> and default
* factory methods for the classes list-bags and array-queues.
*
* @param resultSet the input rsult set delivering the elements. The result
* set is wrapped to a metadata cursor using
* {@link ResultSetMetaDataCursor}.
* @param memSize the available memory size (bytes) for the bag.
* @param objectSize the size (bytes) needed to store one element.
* @throws IllegalArgumentException if not enough memory is available.
*/
public NestedLoopsDistinct(ResultSet resultSet, int memSize, int objectSize) {
this(new ResultSetMetaDataCursor(resultSet), memSize, objectSize);
}
/**
* Returns the metadata information for this metadata-cursor as a composite
* metadata ({@link CompositeMetaData}).
*
* @return the metadata information for this metadata-cursor as a composite
* metadata ({@link CompositeMetaData}).
*/
public CompositeMetaData<Object, Object> getMetaData() {
return globalMetaData;
}
}