package org.voyanttools.trombone.tool.algorithms.pca; import java.util.ArrayList; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import Jama.EigenvalueDecomposition; import Jama.Matrix; public class PrincipalComponentsAnalysis { private Matrix covMatrix; private EigenvalueDecomposition eigenstuff; private double[] eigenvalues; private Matrix eigenvectors; private SortedSet<PrincipleComponent> principleComponents; private double[] means; // @edu.umd.cs.findbugs.annotations.SuppressWarnings({ "EI_EXPOSE_REP2" }) public PrincipalComponentsAnalysis(double[][] input) { this.means = new double[input[0].length]; double[][] cov = getCovariance(input, this.means); this.covMatrix = new Matrix(cov); this.eigenstuff = this.covMatrix.eig(); this.eigenvalues = this.eigenstuff.getRealEigenvalues(); this.eigenvectors = this.eigenstuff.getV(); double[][] vecs = this.eigenvectors.getArray(); int numComponents = this.eigenvectors.getColumnDimension(); // same as num rows. this.principleComponents = new TreeSet<PrincipleComponent>(); for (int i = 0; i < numComponents; i++) { double[] eigenvector = new double[numComponents]; for (int j = 0; j < numComponents; j++) { eigenvector[j] = vecs[i][j]; } this.principleComponents.add(new PrincipleComponent(this.eigenvalues[i], eigenvector)); } } // @edu.umd.cs.findbugs.annotations.SuppressWarnings({ "EI_EXPOSE_REP" }) public double[] getMeans() { return this.means; } /** * Subtracts the mean value from each column. The means must be precomputed, which you get for * free when you make a PCA instance (just call getMeans()). * * @param input * Some data, where each row is a sample point, and each column is a dimension. * @param mean * Subtract the dimension's mean from each value. * @return TODO describe return value */ public static double[][] getMeanAdjusted(double[][] input, double[] mean) { int nRows = input.length; int nCols = input[0].length; double[][] ret = new double[nRows][nCols]; for (int row = 0; row < nRows; row++) { for (int col = 0; col < nCols; col++) { ret[row][col] = input[row][col] - mean[col]; //System.out.println(input[row][col] + " - " + mean[col]); } //System.out.println("---"); } return ret; } // from tapor code public static Matrix getCorrelationCoefficient(double[][] means) { int rows = means.length; int cols = means[0].length; Matrix coeMatrix = new Matrix(rows, rows); int i = 0; while (i < rows) { int k = i; while (k < rows) { int j = 0; double numerator = 0; double sx = 0; double sy = 0; while (j < cols) { numerator += means[i][j] * means[k][j]; sx += means[i][j] * means[i][j]; sy += means[k][j] * means[k][j]; j++; } double tmp = numerator / Math.sqrt(sx * sy); coeMatrix.set(i, k, tmp); coeMatrix.set(k, i, tmp); k++; } i++; } return coeMatrix; } public List<PrincipleComponent> getDominantComponents(int n) { List<PrincipleComponent> ret = new ArrayList<PrincipleComponent>(); int count = 0; for (PrincipleComponent pc : this.principleComponents) { ret.add(pc); count++; if (count >= n) { break; } } return ret; } public static Matrix getDominantComponentsMatrix(List<PrincipleComponent> dom) { int nRows = dom.get(0).eigenVector.length; int nCols = dom.size(); Matrix matrix = new Matrix(nRows, nCols); for (int col = 0; col < nCols; col++) { for (int row = 0; row < nRows; row++) { matrix.set(row, col, dom.get(col).eigenVector[row]); } } return matrix; } public int getNumComponents() { return this.eigenvalues.length; } public static class PrincipleComponent implements Comparable<PrincipleComponent> { public double eigenValue; public double[] eigenVector; // @edu.umd.cs.findbugs.annotations.SuppressWarnings({ "EI_EXPOSE_REP2" }) public PrincipleComponent(double eigenValue, double[] eigenVector) { this.eigenValue = eigenValue; this.eigenVector = eigenVector; } @Override public int compareTo(PrincipleComponent o) { int ret = 0; if (this.eigenValue > o.eigenValue) { ret = -1; } else if (this.eigenValue < o.eigenValue) { ret = 1; } if (ret == 0) { if (this.eigenVector.length > o.eigenVector.length) return -1; else if (this.eigenVector.length < o.eigenVector.length) return 1; else { boolean exact = true; for (int i = 0; i < this.eigenVector.length; i++) { if (this.eigenVector[i] != o.eigenVector[i]) { exact = false; break; } } if (exact) ret = 0; else ret = -1; // TODO how else to compare differing vectors? } } return ret; } @Override public boolean equals(Object obj) { if ((obj == null) || (obj instanceof PrincipleComponent == false)) return false; final PrincipleComponent other = (PrincipleComponent) obj; return this.compareTo(other) == 0; } @Override public int hashCode() { return (int) Math.round(this.eigenValue); } } public static double[][] getCovariance(double[][] input, double[] meanValues) { int numDataVectors = input.length; int n = input[0].length; // get the sum for each col double[] sum = new double[n]; double[] mean = new double[n]; for (int i = 0; i < numDataVectors; i++) { double[] vec = input[i]; for (int j = 0; j < n; j++) { sum[j] = sum[j] + vec[j]; } } for (int i = 0; i < sum.length; i++) { mean[i] = sum[i] / numDataVectors; } double[][] ret = new double[n][n]; for (int i = 0; i < n; i++) { for (int j = i; j < n; j++) { double v = getCovariance(input, i, j, mean); ret[i][j] = v; ret[j][i] = v; } } if (meanValues != null) { System.arraycopy(mean, 0, meanValues, 0, mean.length); } return ret; } /** * Gives covariance between vectors in an n-dimensional space. The two input arrays store values * with the mean already subtracted. Read the code. */ private static double getCovariance(double[][] matrix, int colA, int colB, double[] mean) { //System.out.println("---"); double sum = 0; for (int i = 0; i < matrix.length; i++) { double v1 = matrix[i][colA] - mean[colA]; double v2 = matrix[i][colB] - mean[colB]; //System.out.println(v1 + ", "+v2); sum = sum + (v1 * v2); } int n = matrix.length; double ret = (sum / (n - 1)); return ret; } public SortedSet<PrincipleComponent> getPrincipleComponents() { return this.principleComponents; } }