BlockEmpiricalCDFAggregationFunction.java example

Explorer
xxl-master
/* XXL: The eXtensible and fleXible Library for data processing

Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
                        Head of the Database Research Group
                        Department of Mathematics and Computer Science
                        University of Marburg
                        Germany

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library;  If not, see <http://www.gnu.org/licenses/>. 

    http://code.google.com/p/xxl/

*/

package xxl.core.math.statistics.nonparametric;

import java.util.Iterator;

import xxl.core.cursors.Cursor;
import xxl.core.cursors.mappers.Aggregator;
import xxl.core.functions.Function;
import xxl.core.math.functions.AggregationFunction;
import xxl.core.math.statistics.parametric.aggregates.LastN;

/** In the context of online aggregation, running aggregates are built. Given an 
 * iterator of data, an {@link xxl.core.cursors.mappers.Aggregator Aggregator}
 * computes iteratively aggregates. For instance, the current maximum
 * of the already processed data is determined. An internal aggregation function processes
 * the computation of the new element by consuming the old aggregate and the new element
 * from the input cursor.
 * 
 * Generally, each aggregation function must support a function call of the following type:<br>
 * <tt>agg_n = f (agg_n-1, next)</tt>. <br>
 * There, <tt>agg_n</tt> denotes the computed aggregation value after <tt>n</tt> steps,
 * <tt>f</tt> represents the aggregation function,
 * <tt>agg_n-1</tt> the computed aggregation value after <tt>n-1</tt> steps
 * and <tt>next</tt> the next object to use for computation.
 * <br>
 * This class implements an aggregation function that computes empirical estimators. There,
 * the data is processed in blocks of a predefined size. Given such a block of data, an empirical cdf 
 * is established. 
 * <br>
 * Consider the following example that displays a concrete application of an empirical cdf 
 * aggregation function combined with an aggregator:
 * <code><pre>
 	Aggregator aggregator =
			new Aggregator(
				inputCursor(cursor, blockSize), 
				new BlockEmpiricalCDFAggregationFunction()
			);
 * </pre></code>
 * 
 * @see xxl.core.cursors.mappers.Aggregator
 * @see xxl.core.math.functions.AdaptiveAggregationFunction
 * @see xxl.core.math.statistics.nonparametric.EmpiricalCDF
 *
 */
public class BlockEmpiricalCDFAggregationFunction extends AggregationFunction<Object[],EmpiricalCDF> {

	/** factory for empirical cdf's */
	Function factory=EmpiricalCDF.FACTORY;

	/** internal counter to determine how many objects are processed */
	protected int c;

	/** index of the last built cdf */
	protected int last;

	/** indicates whether this instance is initialized */
	protected boolean init;

	/** Delivers the elements of an input iterator blockwise.  
	 * 
	 * @param input input iterator
	 * @param blockSize size of the blocks
	 * @return cursor that delivers the elements blockwise
	 */
	public static Cursor inputCursor(Iterator input, int blockSize) {
		return new Aggregator(
			input,
			new LastN(blockSize));
	}

	/** Two-figured function call for supporting aggregation by this function.
	 * Each aggregation function must support a function call like <tt>agg_n = f (agg_n-1, next)</tt>,
	 * where <tt>agg_n</tt> denotes the computed aggregation value after <tt>n</tt> steps, <tt>f</tt>
	 * the aggregation function, <tt>agg_n-1</tt> the computed aggregation value after <tt>n-1</tt> steps
	 * and <tt>next</tt> the next object to use for computation.
	 * This method delivers only <tt>null</tt> as aggregation result as long as the aggregation
	 * has not yet initialized.
	 * As result of the aggregation a kernel based block estimator, that relies on the current block, is returned.
	 * 
	 * @param old result of the aggregation function in the previous computation step
	 * @param next next object used for computation
	 * @return new kernel based block estimator
	 */
	public EmpiricalCDF invoke(EmpiricalCDF old, Object[] next) { // next = sample
		c++;
		if (next == null)
			return null;		
		Object[] sample = next;
		boolean build = false;
		// indicates whether a new function must be build or not
		// all needed aggregates fully initialized?
		if (sample == null)
			// if the block did not init, this functions also did not init
			return null;
		if (!init) { // building up first function (block != null, but no functions returned so far)
			last = c; // storing time
			build = true; // building up
			init = true;
		} else {
			int blockSize = sample.length;
			if (c >= last + blockSize) { // new block
				last = c; // storing time
				build = true; // building up
			}
		}
		if (build) {
			return (EmpiricalCDF)factory.invoke(sample);
		} else
			return old;
	}
}