/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.cursors.unions;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import xxl.core.collections.queues.Heap;
import xxl.core.collections.queues.Queue;
import xxl.core.comparators.FeatureComparator;
import xxl.core.cursors.AbstractCursor;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.Cursors;
import xxl.core.functions.AbstractFunction;
/**
* A merger serializes input iterations with respect to a given
* {@link java.util.Comparator comparator} or
* {@link xxl.core.collections.queues.Queue queue}. The input iterations are
* inserted into a queue, e.g., a
* {@link xxl.core.collections.queues.Heap heap}, defining a strategy for
* merging them. Some queues need a comparator to put their elements in order,
* so the caller is able to specify his queue and his comparator with special
* constructors. The serialization of the input iteration's elements is
* depending on the queue's implementation, e.g., the queue can use a
* FIFO-strategy with the intention to receive a cyclic order of the input
* iteration's elements.
* <code><pre>
* cursors[0].next(),
* ...,
* cursors[cursors-1].next(),
* cursors[0].next(),
* ...
* </pre></code>
* But be aware, that the merger works with lazy-evaluation, so calling the
* <code>next</code> or <code>peek</code> method accesses the next cursor,
* named <code>minCursor</code> delivered by the queue and calls its
* <code>next</code> or <code>peek</code> method. If this iteration contains
* further elements it will be added to the queue for an other time. The
* position this iteration is inserted in the queue is defined by the queue's
* strategy.
*
* <p><b>Note:</b> When the given input iteration only implements the interface
* {@link java.util.Iterator} it is wrapped to a cursor by a call to the static
* method {@link xxl.core.cursors.Cursors#wrap(Iterator) wrap}.</p>
*
* <p><b>Example usage (1):</b>
* <code><pre>
* HashGrouper<Integer> hashGrouper = new HashGrouper<Integer>(
* new Enumerator(21),
* new Function<Integer, Integer>() {
* public Integer invoke(Integer next) {
* return next % 5;
* }
* }
* );
*
* hashGrouper.open();
*
* Cursor[] cursors = new Cursor[5];
* for (int i = 0; hashGrouper.hasNext(); i++)
* cursors[i] = hashGrouper.next();
*
* Merger<Integer> merger = new Merger<Integer>(
* ComparableComparator.INTEGER_COMPARATOR,
* (Iterator<Integer>[])cursors
* );
*
* merger.open();
*
* while (merger.hasNext())
* System.out.print(merger.next() + "; ");
* System.out.flush();
*
* merger.close();
* </pre></code>
* This example uses a hash-grouper to partition the input data, i.e., the
* function (object modulo 5) is invoked on each element of the enumerator with
* range 0,...,20. Because the <code>next</code> method of the hash-grouper
* returns a cursor pointing to the next group, i.e., the next bucket in the
* hash-map, all returned cursors are stored in a cursor array named
* <code>cursors</code>. For more detailed information see
* {@link xxl.core.cursors.groupers.HashGrouper}. This cursor array is given to
* the constructor of the merger internally using the defined comparator that
* compares two integer objects. This instance of a merger uses a
* {@link xxl.core.collections.queues.Heap heap} to arrange the cursors,
* because no queue has been specified in the constructor. Because of the
* specified comparator and the implementation of the heap realizing a
* min-heap, the elements in the buckets of the hash-map are merged and
* returned in ascending order. The generated output is as follows:
* <pre>
* 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20;
* </pre>
*
* <p><b>Example usage (2):</b>
* <code><pre>
* merger = new Merger<Integer>(
* new StackQueue<Cursor<Integer>>(),
* new Enumerator(11),
* new Enumerator(11, 21)
* );
*
* merger.open();
*
* while (merger.hasNext())
* System.out.print(merger.next() + "; ");
* System.out.flush();
*
* merger.close();
* </pre></code>
* In this case, the used queue realizes a LIFO-queue (last in first out), so
* the second enumerator is completely consumed at first, then the elements of
* the first enumerator are returned. So the elements are printed to the output
* stream in the following order:
* <pre>
* 11; 12; 13; 14; 15; 16; 17; 18; 19; 20; 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10;
* </pre></p>
*
* @param <E> the type of the elements returned by this iteration.
* @see java.util.Iterator
* @see xxl.core.cursors.Cursor
* @see xxl.core.cursors.AbstractCursor
* @see java.util.Comparator
*/
public class Merger<E> extends AbstractCursor<E> {
/**
* The array containing the input iterations to be merged.
*/
protected List<Cursor<E>> cursors;
/**
* The queue used to define an order for the merge of the input iterations.
*/
protected Queue<Cursor<E>> queue;
/**
* The iteration representing the next element of the queue. All method
* calls concerning the actual element of the merger are redirected to this
* iteration.
*/
protected Cursor<E> minCursor = null;
/**
* Creates a new merger backed on an input iteration array and a queue
* delivering the strategy used for merging the input iterations. All input
* iterations are inserted into the given queue. Every iterator given to
* this constructor is wrapped to a cursor.
*
* @param queue the queue defining the strategy the input iterations are
* accessed.
* @param iterators the input iterations to be merged.
*/
public Merger(Queue<Cursor<E>> queue, Iterator<E>... iterators) {
cursors = new ArrayList<Cursor<E>>(iterators.length);
for (Iterator<E> iterator : iterators)
cursors.add(Cursors.wrap(iterator));
this.queue = queue;
}
/**
* Creates a new merger backed on an input cursor list and a queue
* delivering the strategy used for merging the input cursors. All input
* cursors are inserted into the given queue.
*
* @param queue the queue defining the strategy the input iterations are
* accessed.
* @param cursors the list of cursors to be merged.
*/
public Merger(Queue<Cursor<E>> queue, List<Cursor<E>> cursors) {
this.cursors = cursors;
this.queue = queue;
}
/**
* Creates a new merger backed on an input cursor list and a
* {@link xxl.core.collections.queues.Heap heap} for merging the input
* cursors. The order is defined by the specified comparator in that
* way, that a new
* {@link xxl.core.comparators.FeatureComparator feature-comparator} is
* used calling the <code>compare</code> method of the specified comparator
* in order to compare two elements delivered by the input cursors. So
* the heap manages the cursors' elements, but the order is defined by
* the <code>next</code> element that would be returned by these
* cursors.
*
* @param comparator the comparator used to compare two elements of the
* input iteration.
* @param cursors the list of cursors to be merged.
*/
public Merger(Comparator<? super E> comparator, List<Cursor<E>> cursors) {
this(
new Heap<Cursor<E>>(
cursors.size(),
new FeatureComparator<E, Cursor<E>>(
comparator,
new AbstractFunction<Cursor<E>, E>() {
public E invoke(Cursor<E> cursor) {
return cursor.peek();
}
}
)
),
cursors
);
}
/**
* Creates a new merger backed on an input iteration array and a
* {@link xxl.core.collections.queues.Heap heap} for merging the input
* iterations. The order is defined by the specified comparator in that
* way, that a new
* {@link xxl.core.comparators.FeatureComparator feature-comparator} is
* used calling the <code>compare</code> method of the specified comparator
* in order to compare two elements delivered by the input iterations. So
* the heap manages the iterations' elements, but the order is defined by
* the <code>next</code> element that would be returned by these
* iterations. Every iterator given to this constructor is wrapped to a
* cursor.
*
* @param comparator the comparator used to compare two elements of the
* input iteration.
* @param iterators the input iterations to be merged.
*/
public Merger(Comparator<? super E> comparator, Iterator<E>... iterators) {
this(
new Heap<Cursor<E>>(
iterators.length,
new FeatureComparator<E, Cursor<E>>(
comparator,
new AbstractFunction<Cursor<E>, E>() {
public E invoke(Cursor<E> cursor) {
return cursor.peek();
}
}
)
),
iterators
);
}
/**
* Opens the merger, i.e., signals the cursor to reserve resources, open
* input iterations, etc. Before a cursor has been opened calls to methods
* like <code>next</code> or <code>peek</code> are not guaranteed to yield
* proper results. Therefore <code>open</code> must be called before a
* cursor's data can be processed. Multiple calls to <code>open</code> do
* not have any effect, i.e., if <code>open</code> was called the cursor
* remains in the state <i>opened</i> until its <code>close</code> method
* is called.
*
* <p>Note, that a call to the <code>open</code> method of a closed cursor
* usually does not open it again because of the fact that its state
* generally cannot be restored when resources are released respectively
* files are closed.</p>
*/
public void open() {
if (isOpened)
return;
super.open();
for (Cursor<E> cursor : cursors)
cursor.open();
queue.open();
for (Cursor<E> cursor : cursors)
if (cursor.hasNext())
queue.enqueue(cursor);
}
/**
* Closes the merger, i.e., signals the cursor to clean up resources, close
* input iterations, etc. When a cursor has been closed calls to methods
* like <code>next</code> or <code>peek</code> are not guaranteed to yield
* proper results. Multiple calls to <code>close</code> do not have any
* effect, i.e., if <code>close</code> was called the cursor remains in the
* state <i>closed</i>.
*
* <p>Note, that a closed cursor usually cannot be opened again because of
* the fact that its state generally cannot be restored when resources are
* released respectively files are closed.</p>
*/
public void close() {
if (isClosed)
return;
super.close();
for (Cursor<E> cursor : cursors)
cursor.close();
queue.close();
}
/**
* Returns <code>true</code> if the iteration has more elements. (In other
* words, returns <code>true</code> if <code>next</code> or
* <code>peek</code> would return an element rather than throwing an
* exception.)
*
* @return <code>true</code> if the merger has more elements.
*/
protected boolean hasNextObject() {
return queue.size() > 0;
}
/**
* Returns the next element in the iteration. This element will be
* accessible by some of the merger's methods, e.g., <code>update</code> or
* <code>remove</code>, until a call to <code>next</code> or
* <code>peek</code> occurs. This is calling <code>next</code> or
* <code>peek</code> proceeds the iteration and therefore its previous
* element will not be accessible any more.
*
* <p>A next element is available if the queue, which contains the input
* iterations, is not empty. The queue realizes a strategy, so delivers the
* the input iterations in a specific order. Therefore the next element is
* determined by accessing the next element of the queue's
* <code>peek</code> element, the cursor <code>minCursor</code>. If this
* cursor returned by the queue contains further elements,
* <code>queue.replace(minCursor)</code> is performed, otherwise the next
* cursor in the queue, returned by the queue's <code>next</code> method,
* will be consumed.</p>
*
* @return the next element in the iteration.
*/
protected E nextObject() {
minCursor = queue.dequeue();
E minimum = minCursor.next();
if (minCursor.hasNext())
queue.enqueue(minCursor);
return minimum;
}
/**
* Removes from the underlying data structure the last element returned by
* the merger (optional operation). This method can be called only once per
* call to <code>next</code> or <code>peek</code> and removes the element
* returned by this method. Note, that between a call to <code>next</code>
* and <code>remove</code> the invocation of <code>peek</code> or
* <code>hasNext</code> is forbidden. The behaviour of a cursor is
* unspecified if the underlying data structure is modified while the
* iteration is in progress in any way other than by calling this method.
*
* <p>Note, that this operation is optional and might not work for all
* cursors.</p>
*
* @throws IllegalStateException if the <code>next</code> or
* <code>peek</code> method has not yet been called, or the
* <code>remove</code> method has already been called after the
* last call to the <code>next</code> or <code>peek</code> method.
* @throws UnsupportedOperationException if the <code>remove</code>
* operation is not supported by the merger.
*/
public void remove() throws IllegalStateException, UnsupportedOperationException {
super.remove();
if (minCursor == null)
throw new IllegalStateException();
minCursor.remove();
minCursor = null;
}
/**
* Returns <code>true</code> if the <code>remove</code> operation is
* supported by the merger. Otherwise it returns <code>false</code>.
*
* @return <code>true</code> if the <code>remove</code> operation is
* supported by the merger, otherwise <code>false</code>.
*/
public boolean supportsRemove() {
return minCursor != null ?
minCursor.supportsRemove() :
false;
}
/**
* Replaces the last element returned by the merger in the underlying data
* structure (optional operation). This method can be called only once per
* call to <code>next</code> or <code>peek</code> and updates the element
* returned by this method. Note, that between a call to <code>next</code>
* and <code>update</code> the invocation of <code>peek</code> or
* <code>hasNext</code> is forbidden. The behaviour of a merger is
* unspecified if the underlying data structure is modified while the
* iteration is in progress in any way other than by calling this method.
*
* <p>Note, that this operation is optional and might not work for all
* cursors.</p>
*
* @param object the object that replaces the last element returned by the
* merger.
* @throws IllegalStateException if the <code>nextcode/tt> or
* <code>peek</code> method has not yet been called, or the
* <code>update</code> method has already been called after the
* last call to the <code>next</code> or <code>peek</code> method.
* @throws UnsupportedOperationException if the <code>update</code>
* operation is not supported by the merger.
*/
public void update(E object) throws IllegalStateException, UnsupportedOperationException {
super.update(object);
if (minCursor == null)
throw new IllegalStateException();
minCursor.update(object);
minCursor = null;
}
/**
* Returns <code>true</code> if the <code>update</code> operation is
* supported by the merger. Otherwise it returns <code>false</code>.
*
* @return <code>true</code> if the <code>update</code> operation is
* supported by the merger, otherwise <code>false</code>.
*/
public boolean supportsUpdate() {
return minCursor != null ?
minCursor.supportsUpdate() :
false;
}
/**
* Resets the merger to its initial state such that the caller is able to
* traverse the underlying data structure again without constructing a new
* merger (optional operation). The modifications, removes and updates
* concerning the underlying data structure, are still persistent.
*
* <p>Note, that this operation is optional and might not work for all
* cursors.</p>
*
* @throws UnsupportedOperationException if the <code>reset</code>
* operation is not supported by the merger.
*/
public void reset() throws UnsupportedOperationException {
super.reset();
for (Cursor<E> cursor : cursors)
cursor.reset();
queue.clear();
for (Cursor<E> cursor : cursors)
if (cursor.hasNext())
queue.enqueue(cursor);
}
/**
* Returns <code>true</code> if the <code>reset</code> operation is
* supported by the merger. Otherwise it returns <code>false</code>.
*
* @return <code>true</code> if the <code>reset</code> operation is
* supported by the merger, otherwise <code>false</code>.
*/
public boolean supportsReset() {
for (Cursor<E> cursor : cursors)
if (!cursor.supportsReset())
return false;
return true;
}
}