/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.functions;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import xxl.core.cursors.identities.TeeCursor;
import xxl.core.math.Maths;
import xxl.core.util.Distance;
/**
* This class provides a projection from any object-space with a given distance
* function to a k-dimensional real valued metric space (using the euklidean
* distance function, also known as L2-Metric) using the FastMap algorithm.
* The implementation is based upon [FL95 ]: Christos Faloutsos, King-Ip Lin.
* FastMap: A Fast Algorithm for Indexing, Data-Mining and Visualization
* of Traditional and Multimedia Datasets. SIGMOD Conference 1995. 163-174.
*
* <p>Due to the lack of an efficient memory management in java the original
* algorithm is modified. Instead of computing and storing a distance matrix in
* advance only the pivot objects of each dimension are computed in advance and
* the modified distance function for each dimension will be recursivly
* evaluated.</p>
*
* @param <T> the type of the objects to be projected, i.e., the object-space.
*/
@SuppressWarnings("serial")
public class FastMapProjection<T> extends AbstractFunction<T, double[]> {
/**
* The chosen dimension.
*/
protected int dim;
/**
* Stores the computed pivot elements (left) for the dimension denoted by
* the index.
*/
protected List<T> a;
/**
* Stores the computed pivot elements (right) for the dimension denoted by
* the index.
*/
protected List<T> b;
/**
* The distance function used for the object space.
*/
protected Distance<? super T> dist;
/**
* Stores the distances between the pivot-objects. d[k] is the distance
* between a[k] and b[k]. Used for effectiveness.
*/
protected double[] d;
/**
* Given objects of the object space. A
* {@link xxl.core.cursors.identities.TeeCursor tee cursor}* is used
* because in order to compute the pivot elements in advance one must scan
* the data <code>2*k</code> times, i.e. the complexity of locating the
* pivot elements is <code>O(2kn)</code> whereas <code>n</code> denotes the
* number of objects in the {@link xxl.core.cursors.Cursor cursor}.
*
* @see xxl.core.cursors.Cursor
* @see xxl.core.cursors.identities.TeeCursor
*/
protected TeeCursor<? extends T> data;
/**
* Indicates whether the tee cursor has been used at least one time.
*/
protected boolean teecursorInit = false;
/**
* Constructs a new FastMap Projection based upon the data given.
*
* @param data init data.
* @param dimension dimension to project data to.
* @param dist distance function for the kind of data given in data.
* @throws IllegalArgumentException if the given dimension isn't at least 1
* or the given tee cursor doesn't support multiple access.
*
* @see xxl.core.cursors.identities.TeeCursor
* @see xxl.core.util.Distance
*/
public FastMapProjection(TeeCursor<? extends T> data, int dimension, Distance<? super T> dist) throws IllegalArgumentException {
dim = dimension ;
if (dim <= 0)
throw new IllegalArgumentException("Dimension to project to must be one or greater! (given=" + dim + ")");
a = new ArrayList<T>(dim);
b = new ArrayList<T>(dim);
d = new double[dim];
this.dist = dist;
this.data = data;
try {
init();
}
catch (IllegalStateException e) {
throw new IllegalArgumentException("Used teecursor must support multiple access!");
}
}
/**
* Constructs a new FastMap Projection based upon the data given. The given
* data will be wrapped by a tee cursor in order to support multiple access
* to the data. For default a TeeCursor is used.
*
* @param data init data. Will be wrapped by a tee cursor to support
* multiple access.
* @param dimension dimension to project data to.
* @param dist distance function for the kind of data given.
* @throws IllegalArgumentException if the given dimension isn't at least
* 1.
*
* @see xxl.core.cursors.identities.TeeCursor
* @see xxl.core.cursors.identities.TeeCursor
* @see xxl.core.util.Distance
*/
public FastMapProjection(Iterator<? extends T> data, int dimension, Distance<? super T> dist) throws IllegalArgumentException {
this(new TeeCursor<T>(data), dimension, dist);
}
/**
* Returns a projection to a k-dimensional real valued space of the given
* Object as an object of type <code>double[k]</code>.
*
* @param x object to project to a subspace.
* @return projected object (represented by a <code>double[]</code> of
* dimension <code>k</code>.)
*/
@Override
public double[] invoke(T x) {
double[] re = new double[dim];
for (int i = 0; i < dim ; i++) {
re[i] = p(i, x);
if (Double.isInfinite(re[i]) || Double.isNaN(re[i])) {
re[i] = 0;
}
}
return re;
}
/**
* Determines the pivot objects for each dimension needed. The complexity
* of the initialization is <code>O(2kn)</code> where <code>n</code>
* denotes the number of objects in the given
* {@link xxl.core.cursors.identities.TeeCursor tee cursor} and
* <code>k</code> the given dimensions.
*
* @throws IllegalStateException if the
* {@link xxl.core.cursors.identities.TeeCursor tee cursor} doesn't
* support adequate multiple access.
*/
protected void init() throws IllegalStateException {
T p = null;
if (data.hasNext())
p = data.next();
for (int k = 0; k < dim; k++) {
b.add(getFurthermost(k, p));
a.add(getFurthermost(k, b.get(k)));
d[k] = d(k, a.get(k), b.get(k));
}
}
/**
* Returns the furthermost object wrt to p.
*
* @param k specifies the aggregation level of the distance function used
* to compute the furthermost object.
* @param p reference object.
* @throws IllegalStateException if the tee cursor doesn't support adequate
* multiple access.
* @return the furthermost object wrt to p.
*/
protected T getFurthermost(int k, T p) throws IllegalStateException {
Iterator<? extends T> it = null;
if (!teecursorInit){
it = data;
teecursorInit = true;
}
else
it = data.cursor();
T fm = p;
T tmp = null;
while (it.hasNext()) {
tmp = it.next();
if (d(k, p, fm) < d(k, p, tmp))
fm = tmp;
}
return fm;
}
/**
* Returns the projected distance between the objects l and r.
*
* @param k Specifies the aggregation level of the distance function.
* @param l left object for distance computing.
* @param r right object for distance computing.
* @return the modified distance between two objects of given aggregation
* level.
*/
protected double d(int k, T l, T r) {
double re = 0.0;
if (k > 0)
re = Math.sqrt(Math.pow(d(k-1, l, r), 2.0) - Math.pow(Maths.pDistance(p(k-1, l), p(k-1, r), 2), 2.0));
else
re = dist.distance(l, r);
return re;
}
/**
* Returns the k-th component of the FastMap-Projection of o.
*
* @param o the object to project.
* @param k dimension of the subspace the object is projected to.
* @return the k-th component of the projection of object o.
*/
protected double p(int k, T o) {
double dab = d[k];
double dai = d(k, a.get(k), o);
double dbi = d(k, b.get(k), o);
return (Math.pow(dai, 2.0) + Math.pow(dab, 2.0) - Math.pow(dbi, 2.0)) / (2.0 * dab);
}
}