/*
* KernelDensityEstimator2D.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.geo;
import cern.colt.list.DoubleArrayList;
import cern.jet.stat.Descriptive;
import dr.math.distributions.NormalDistribution;
import dr.math.matrixAlgebra.Matrix;
import dr.math.matrixAlgebra.Vector;
import dr.stats.DiscreteStatistics;
import dr.geo.contouring.ContourGenerator;
import dr.geo.contouring.ContourAttrib;
import dr.geo.contouring.ContourPath;
import dr.geo.contouring.ContourMaker;
import java.util.Arrays;
/**
* KernelDensityEstimator2D creates a bi-variate kernel density smoother for data
* @author Marc A. Suchard
* @author Philippe Lemey
*/
public class KernelDensityEstimator2D implements ContourMaker {
// kde2d =
// function (x, y, h, n = 25, lims = c(range(x), range(y)))
// {
// nx <- length(x)
// if (length(y) != nx)
// stop("data vectors must be the same length")
// if (any(!is.finite(x)) || any(!is.finite(y)))
// stop("missing or infinite values in the data are not allowed")
// if (any(!is.finite(lims)))
// stop("only finite values are allowed in 'lims'")
// gx <- seq.int(lims[1], lims[2], length.out = n)
// gy <- seq.int(lims[3], lims[4], length.out = n)
// if (missing(h))
// h <- c(bandwidth.nrd(x), bandwidth.nrd(y))
// h <- h/4
// ax <- outer(gx, x, "-")/h[1]
// ay <- outer(gy, y, "-")/h[2]
// z <- matrix(dnorm(ax), n, nx) %*% t(matrix(dnorm(ay), n,
// nx))/(nx * h[1] * h[2])
// return(list(x = gx, y = gy, z = z))
// }
/*
* @param x x-coordinates of observations
* @param y y-coordinates of observations
* @param h bi-variate smoothing bandwidths
* @param n smoothed grid size
* @param lims bi-variate min/max for grid
*/
public KernelDensityEstimator2D(final double[] x, final double[] y, final double[] h, final int n, final double[] lims) {
this(x, y, h, n, lims, false);
}
public KernelDensityEstimator2D(final double[] x, final double[] y, final double[] h, final int n, final double[] lims, boolean bandwdithLimited) {
this.x = x;
this.y = y;
if (x.length != y.length)
throw new RuntimeException("data vectors must be the same length");
this.nx = x.length;
if (n <= 0)
throw new RuntimeException("must have a positive number of grid points");
this.n = n;
if (lims != null)
this.lims = lims;
else
setupLims();
this.limitBandwidth = bandwdithLimited;
if (h != null)
this.h = h;
else
setupH();
doKDE2D();
}
public KernelDensityEstimator2D(final double[] x, final double[] y, boolean limitBandwidth) {
this(x,y,null,50,null,limitBandwidth);
}
public KernelDensityEstimator2D(final double[] x, final double[] y) {
this(x,y,null,50,null);
}
public KernelDensityEstimator2D(final double[] x, final double[] y, final int n) {
this(x,y,null,n,null);
}
public void doKDE2D() {
gx = makeSequence(lims[0], lims[1], n);
gy = makeSequence(lims[2], lims[3], n);
double[][] ax = outerMinusScaled(gx, x, h[0]);
double[][] ay = outerMinusScaled(gy, y, h[1]);
normalize(ax);
normalize(ay);
z = new double[n][n];
double scale = nx * h[0] * h[1];
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
double value = 0;
for (int k = 0; k < nx; k++) {
value += ax[i][k] * ay[j][k];
}
z[i][j] = value / scale;
}
}
}
public double findLevelCorrespondingToMass(double probabilityMass) {
double level = 0;
double[] sz = new double[n*n];
double[] c1 = new double[n*n];
for(int i=0; i<n; i++)
System.arraycopy(z[i],0,sz,i*n,n);
Arrays.sort(sz);
final double dx = gx[1] - gx[0];
final double dy = gy[1] - gy[0];
final double dxdy = dx * dy;
c1[0] = sz[0] * dxdy;
final double criticalValue = 1.0 - probabilityMass;
if (criticalValue < c1[0] || criticalValue >= 1.0)
throw new RuntimeException();
// do linearInterpolation on density (y) as function of cumulative sum (x)
for(int i=1; i<n*n; i++) {
c1[i] = sz[i] * dxdy + c1[i-1];
if (c1[i] > criticalValue) { // first largest point
final double diffC1 = c1[i] - c1[i-1];
final double diffSz = sz[i] - sz[i-1];
level = sz[i] - (c1[i]-criticalValue) / diffC1 * diffSz;
break;
}
}
return level;
}
public ContourPath[] getContourPaths(double hpdValue) {
double thresholdDensity = findLevelCorrespondingToMass(hpdValue);
ContourGenerator contour = new ContourGenerator(getXGrid(), getYGrid(), getKDE(),
new ContourAttrib[]{new ContourAttrib(thresholdDensity)});
ContourPath[] paths = null;
try {
paths = contour.getContours();
} catch (InterruptedException e) {
e.printStackTrace();
}
return paths;
}
public double[][] getKDE() {
return z;
}
public double[] getXGrid() {
return gx;
}
public double[] getYGrid() {
return gy;
}
public void normalize(double[][] X) {
for (int i = 0; i < X.length; i++) {
for (int j = 0; j < X[0].length; j++)
X[i][j] = NormalDistribution.pdf(X[i][j], 0, 1);
}
}
public double[][] outerMinusScaled(double[] X, double[] Y, double scale) {
double[][] A = new double[X.length][Y.length];
for (int indexX = 0; indexX < X.length; indexX++) {
for (int indexY = 0; indexY < Y.length; indexY++)
A[indexX][indexY] = (X[indexX] - Y[indexY]) / scale;
}
return A;
}
public double[] makeSequence(double start, double end, int length) {
double[] seq = new double[length];
double by = (end - start) / (length - 1);
double value = start;
for (int i = 0; i < length; i++, value += by) {
seq[i] = value;
}
return seq;
}
private double margin = 0.1;
private void setupLims() {
lims = new double[4];
lims[0] = DiscreteStatistics.min(x);
lims[1] = DiscreteStatistics.max(x);
lims[2] = DiscreteStatistics.min(y);
lims[3] = DiscreteStatistics.max(y);
double xDelta = (lims[1] - lims[0]) * margin;
double yDelta = (lims[3] - lims[2]) * margin;
lims[0] -= xDelta;
lims[1] += xDelta;
lims[2] -= yDelta;
lims[3] += yDelta;
}
private void setupH() {
h = new double[2];
h[0] = bandwidthNRD(x) / 4;
h[1] = bandwidthNRD(y) / 4;
if (limitBandwidth) {
if (h[0] > 0.5) {
h[0] = 0.5;
}
if (h[1] > 0.5) {
h[1] = 0.5;
}
}
}
// bandwidth.nrd =
// function (x)
// {
// r <- quantile(x, c(0.25, 0.75))
// h <- (r[2] - r[1])/1.34
// 4 * 1.06 * min(sqrt(var(x)), h) * length(x)^(-1/5)
// }
public double bandwidthNRD(double[] in) {
DoubleArrayList inList = new DoubleArrayList(in.length);
for (double d : in)
inList.add(d);
inList.sort();
final double h = (Descriptive.quantile(inList, 0.75) - Descriptive.quantile(inList, 0.25)) / 1.34;
return 4 * 1.06 *
Math.min(Math.sqrt(DiscreteStatistics.variance(in)), h) *
Math.pow(in.length, -0.2);
}
public static void main(String[] arg) {
double[] x = {3.4, 1.2, 5.6, 2.2, 3.1};
double[] y = {1.0, 2.0, 1.0, 2.0, 1.0};
KernelDensityEstimator2D kde = new KernelDensityEstimator2D(x, y, 4);
System.out.println(new Vector(kde.getXGrid()));
System.out.println(new Vector(kde.getYGrid()));
System.out.println(new Matrix(kde.getKDE()));
System.exit(-1);
}
public double[] getLims() { return lims; }
private final double[] x; // x coordinates
private final double[] y; // y coordinates
private double[] h; // h[0] x-bandwidth, h[1] y-bandwidth
private final int n; // grid size
private double[] lims; // x,y limits
private int nx; // length of vectors
private double[] gx; // x-grid points
private double[] gy; // y-grid points
private double[][] z; // KDE estimate;
private final boolean limitBandwidth;
}