/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.math.statistics.nonparametric;
import java.util.Iterator;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.mappers.Aggregator;
import xxl.core.functions.Function;
import xxl.core.math.functions.AggregationFunction;
import xxl.core.math.statistics.parametric.aggregates.LastN;
/** In the context of online aggregation, running aggregates are built. Given an
* iterator of data, an {@link xxl.core.cursors.mappers.Aggregator Aggregator}
* computes iteratively aggregates. For instance, the current maximum
* of the already processed data is determined. An internal aggregation function processes
* the computation of the new element by consuming the old aggregate and the new element
* from the input cursor.
*
* Generally, each aggregation function must support a function call of the following type:<br>
* <tt>agg_n = f (agg_n-1, next)</tt>. <br>
* There, <tt>agg_n</tt> denotes the computed aggregation value after <tt>n</tt> steps,
* <tt>f</tt> represents the aggregation function,
* <tt>agg_n-1</tt> the computed aggregation value after <tt>n-1</tt> steps
* and <tt>next</tt> the next object to use for computation.
* <br>
* This class implements an aggregation function that computes empirical estimators. There,
* the data is processed in blocks of a predefined size. Given such a block of data, an empirical cdf
* is established.
* <br>
* Consider the following example that displays a concrete application of an empirical cdf
* aggregation function combined with an aggregator:
* <code><pre>
Aggregator aggregator =
new Aggregator(
inputCursor(cursor, blockSize),
new BlockEmpiricalCDFAggregationFunction()
);
* </pre></code>
*
* @see xxl.core.cursors.mappers.Aggregator
* @see xxl.core.math.functions.AdaptiveAggregationFunction
* @see xxl.core.math.statistics.nonparametric.EmpiricalCDF
*
*/
public class BlockEmpiricalCDFAggregationFunction extends AggregationFunction<Object[],EmpiricalCDF> {
/** factory for empirical cdf's */
Function factory=EmpiricalCDF.FACTORY;
/** internal counter to determine how many objects are processed */
protected int c;
/** index of the last built cdf */
protected int last;
/** indicates whether this instance is initialized */
protected boolean init;
/** Delivers the elements of an input iterator blockwise.
*
* @param input input iterator
* @param blockSize size of the blocks
* @return cursor that delivers the elements blockwise
*/
public static Cursor inputCursor(Iterator input, int blockSize) {
return new Aggregator(
input,
new LastN(blockSize));
}
/** Two-figured function call for supporting aggregation by this function.
* Each aggregation function must support a function call like <tt>agg_n = f (agg_n-1, next)</tt>,
* where <tt>agg_n</tt> denotes the computed aggregation value after <tt>n</tt> steps, <tt>f</tt>
* the aggregation function, <tt>agg_n-1</tt> the computed aggregation value after <tt>n-1</tt> steps
* and <tt>next</tt> the next object to use for computation.
* This method delivers only <tt>null</tt> as aggregation result as long as the aggregation
* has not yet initialized.
* As result of the aggregation a kernel based block estimator, that relies on the current block, is returned.
*
* @param old result of the aggregation function in the previous computation step
* @param next next object used for computation
* @return new kernel based block estimator
*/
public EmpiricalCDF invoke(EmpiricalCDF old, Object[] next) { // next = sample
c++;
if (next == null)
return null;
Object[] sample = next;
boolean build = false;
// indicates whether a new function must be build or not
// all needed aggregates fully initialized?
if (sample == null)
// if the block did not init, this functions also did not init
return null;
if (!init) { // building up first function (block != null, but no functions returned so far)
last = c; // storing time
build = true; // building up
init = true;
} else {
int blockSize = sample.length;
if (c >= last + blockSize) { // new block
last = c; // storing time
build = true; // building up
}
}
if (build) {
return (EmpiricalCDF)factory.invoke(sample);
} else
return old;
}
}