/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.cursors.mappers;
import java.util.Iterator;
import xxl.core.math.statistics.parametric.aggregates.ReservoirSample;
import xxl.core.util.random.ContinuousRandomWrapper;
import xxl.core.util.random.DiscreteRandomWrapper;
import xxl.core.util.random.JavaContinuousRandomWrapper;
import xxl.core.util.random.JavaDiscreteRandomWrapper;
/**
* This class shows how to make use of the
* {@link xxl.core.math.statistics.parametric.aggregates.ReservoirSample reservoir-sampling function}.
* A reservoir-sampler is an
* {@link xxl.core.cursors.mappers.Aggregator aggregator} using the
* reservoir-sampling function with a given strategy for <tt>on-line
* sampling</tt>. There are three types of strategies available based on
* [Vit85]: Jeffrey Scott Vitter, <i>Random Sampling with a Reservoir</i>, in
* ACM Transactions on Mathematical Software, Vol. 11, NO. 1, March 1985,
* Pages 37-57.
*
* @see java.util.Iterator
* @see xxl.core.cursors.Cursor
* @see xxl.core.cursors.mappers.Aggregator
*/
public class ReservoirSampler extends Aggregator {
/**
* Indicates the use of type R for sampling strategy.
*
* @see xxl.core.math.statistics.parametric.aggregates.ReservoirSample
* @see xxl.core.math.statistics.parametric.aggregates.ReservoirSample.RType
*/
public static final int RTYPE = 0;
/**
* Indicates the use of type X for sampling strategy.
*
* @see xxl.core.math.statistics.parametric.aggregates.ReservoirSample
* @see xxl.core.math.statistics.parametric.aggregates.ReservoirSample.XType
*/
public static final int XTYPE = 1;
/**
* Indicates the use of type Y for sampling strategy. This type is not
* available so far due to the lack of information about the used
* distribution to determine the position in the reservoir for a sampled
* object.
*
* @see xxl.core.math.statistics.parametric.aggregates.ReservoirSample
*/
public static final int YTYPE = 2;
/**
* Indicates the use of type Z for sampling strategy.
*
* @see xxl.core.math.statistics.parametric.aggregates.ReservoirSample
* @see xxl.core.math.statistics.parametric.aggregates.ReservoirSample.ZType
*/
public static final int ZTYPE = 3;
/**
* Constructs a new aggregator that provides a reservoir-sampling as output.
*
* @param input input iteration to draw the sample from.
* @param n size of the sample to draw.
* @param type strategy used to determine the position of the treated object
* in the sampling reservoir.
* @param crw used PRNG for computing continuous random numbers.
* @param drw used PRNG for computing discrete random numbers.
* @throws IllegalArgumentException if an unknown sampling strategy has been
* given.
*/
public ReservoirSampler(Iterator input, int n, int type, ContinuousRandomWrapper crw, DiscreteRandomWrapper drw) throws IllegalArgumentException {
super(input, new ReservoirSample(n, new ReservoirSample.RType (n, crw, drw)));
switch (type) {
case RTYPE:
this.function = new ReservoirSample(
n,
new ReservoirSample.RType(n, crw, drw)
);
break;
case XTYPE:
this.function = new ReservoirSample(
n,
new ReservoirSample.XType(n, crw, drw)
);
break;
case YTYPE:
//this.function = new ReservoirSample(
// n,
// new ReservoirSample.YType(n, crw, drw)
//);
throw new IllegalArgumentException("type y is not supported so far. See javadoc xxl.core.math.statistics.parametric.aggregates.ReservoirSample for details!");
//break;
case ZTYPE:
this.function = new ReservoirSample(
n,
new ReservoirSample.ZType(n, crw, drw)
);
break;
default:
throw new IllegalArgumentException("unknown sampling strategy given!");
}
}
/**
* Constructs a new aggregator that provides a reservoir sampling as output
* using a default PRNG for computing
* {@link xxl.core.util.random.JavaContinuousRandomWrapper continuous} random
* numbers and a default PRNG for computing
* {@link xxl.core.util.random.JavaDiscreteRandomWrapper discrete} random
* numbers.
*
* @param input iteration to draw the sample from.
* @param n size of the sample to draw.
* @param type strategy used to determine the position of the treated object
* in the sampling reservoir.
* @throws IllegalArgumentException if an unknown sampling strategy has been
* given.
*/
public ReservoirSampler(Iterator input, int n, int type) throws IllegalArgumentException {
this(input, n, type, new JavaContinuousRandomWrapper(), new JavaDiscreteRandomWrapper());
}
/**
* Constructs a new aggregator that provides a reservoir sampling as output.
*
* @param input iteration to draw the sample from.
* @param reservoirSample function providing an online sampling.
*/
public ReservoirSampler(Iterator input, ReservoirSample reservoirSample) {
super(input, reservoirSample);
}
/**
* Constructs a new aggregator that provides a reservoir sampling as output.
*
* @param input iteration to draw the sample from.
* @param n size of the sample to draw.
* @param strategy strategy used to determine the position of the treated
* object in the sampling reservoir.
*/
public ReservoirSampler(Iterator input, int n, ReservoirSample.ReservoirSamplingStrategy strategy) {
this(input, new ReservoirSample(n, strategy));
}
}