/*
* Copyright (C) 2014 Mathias Unberath
* CONRAD is developed as an Open Source project under the GNU General Public License (GPL).
*/
package edu.stanford.rsl.conrad.geometry.shapes.activeshapemodels;
import java.util.ArrayList;
import ij.ImageJ;
import ij.gui.Plot;
import edu.stanford.rsl.conrad.geometry.shapes.mesh.DataMatrix;
import edu.stanford.rsl.conrad.numerics.DecompositionSVD;
import edu.stanford.rsl.conrad.numerics.SimpleMatrix;
import edu.stanford.rsl.conrad.numerics.SimpleOperators;
import edu.stanford.rsl.conrad.numerics.SimpleVector;
import edu.stanford.rsl.conrad.utils.VisualizationUtil;
/**
* This class performs Principal Component Analysis (PCA) on a data-set. The data-set can be composed of scalar values or
* of any dimension, e.g. vertices of a surface mesh representing shape. The columns of the data-set are treated as random
* variables, hence one sample needs to be stored in one column only, no matter what dimensionality.
* The implementation here calculates the Eigen-Analysis of the covariance matrix using a singular value decomposition. The
* Eigen-Values and -Vectors of the covariance matrix will be accessible through the class members.
* The implementation assumes, that the dataset has been subject to Generalized Procrustes Alignment. If an implementation of GPA
* other than the one provided here is used, modifications to PCA (e.g. re-scaling and consensus subtraction) might not be necessary.
* Jolliffe, Ian. Principal component analysis. John Wiley & Sons, Ltd, 2005.
* @author Mathias Unberath
*
*/
public class PCA {
/**
* Data-sets typically consist of point-clouds or meshes. This variable stores
* the dimension of the vertices, as the data is stored in a single column.
*/
public int dimension = 0;
/**
* Data-sets typically consist of point-clouds or meshes. This variable stores
* the number of vertices, as the data is stored in a single column.
*/
public int numVertices;
/**
* Number of points in a data-set. Can be calculated using the number of
* vertices and their dimension using multiplication. Corresponds to the number of rows.
*/
public int numPoints;
/**
* Number of samples in the data matrix, corresponds to the number of columns.
*/
public int numSamples;
/**
* The matrix containing the data-sets to be analyzed.
*/
public DataMatrix data;
/**
* Connectivity information when dealing with a mesh.
*/
public SimpleMatrix connectivity;
/**
* Matrix containing the Eigenvectors of the covariance matrix after singular value decomposition.
*/
public SimpleMatrix eigenVectors;
/**
* Array containing the eigenvalues of the covariance matrix after singular value decomposition.
*/
public double[] eigenValues;
/**
* Number of principal Components needed to reach variation threshold.
*/
public int numComponents;
/**
* Threshold at which principal components will be omitted if a variation of this value is reached.
*/
public double variationThreshold = 1;
public boolean PLOT_SINGULAR_VALUES = false;
//==========================================================================================
// METHODS
//==========================================================================================
/**
* Constructs an empty PCA object. Variables need to be initialized before analysis can be performed.
* TODO implement init method for this constructor
*/
public PCA(){
}
/**
* Constructs a PCA object and initializes the data array and count variables.
* @param data The data array to be analyzed.
*/
public PCA(DataMatrix data){
this.numPoints = data.getRows();
this.numSamples = data.getCols();
this.dimension = data.dimension;
this.numVertices = numPoints / dimension;
data = scaleConsensus(data);
this.data = upscaleAndSubtractConsensus(data);
if(data.HAS_CONNECTIVITY){
this.connectivity = data.connectivity;
}
}
/**
* Constructs a PCA object and initializes the data array.
* Due to the lacking information about scaling factors and consensus object, this constructor is not to be
* used for statistical shape model generation after generalized procrustes analysis.
* @param data The data array to be analyzed.
* @param dim The dimension of the data points.
*/
public PCA(SimpleMatrix data, int dim){
this.numPoints = data.getRows();
this.numSamples = data.getCols();
this.dimension = dim;
this.numVertices = numPoints / dimension;
DataMatrix datam = new DataMatrix();
datam.setDimensions(data.getRows(), dim, data.getCols());
datam.add(data);
datam.scaling = new ArrayList<Float>();
for(int i = 0; i < data.getCols(); i++){
datam.scaling.add(1f);
}
datam.consensus = getConsensus(data, dim);
this.data = upscaleAndSubtractConsensus(datam);
}
/**
* Calculates the consensus as mean of all samples stored column-wise.
* @param data
* @param dim
* @return
*/
private SimpleMatrix getConsensus(SimpleMatrix data, int dim){
SimpleVector c = data.getCol(0);
for(int i = 1; i < data.getCols(); i++){
c.add(data.getCol(i));
}
c.divideBy(data.getCols());
SimpleMatrix cons = new SimpleMatrix(data.getRows()/dim, dim);
for(int i = 0; i < cons.getRows(); i++){
for(int j = 0; j < cons.getCols(); j++){
cons.setElementValue(i, j, c.getElement(i*dim + j));
}
}
return cons;
}
/**
* Performs the principal component analysis on the data-set.
*/
public void run(){
assert(data != null) : new Exception("Initialize data array fist.");
System.out.println("Starting principal component analysis on " + numSamples + " data-sets.");
DecompositionSVD svd = new DecompositionSVD(data);
plot(svd.getSingularValues());
this.numComponents = getPrincipalModesOfVariation(svd.getSingularValues());
reduceDimensionality(svd.getSingularValues(), normalizeColumns(svd.getU()));
//this.eigenVectors = normalizeColumns(svd.getU());
//this.eigenValues = svd.getSingularValues();
}
/**
* Performs the principal component analysis on the data-set.
* Alternative run with preset feature space dimensionality.
* @param dimensionality Number of principal components used to span the feature space.
*/
public void run(int dimensionality){
assert(data != null) : new Exception("Initialize data array fist.");
assert(dimensionality <= this.numSamples ) : new Exception("Feature space dimensionality cannot exceed number of samples.");
System.out.println("Starting principal component analysis on " + numSamples + " data-sets.");
DecompositionSVD svd = new DecompositionSVD(data);
plot(svd.getSingularValues());
this.numComponents = dimensionality;
reduceDimensionality(svd.getSingularValues(), normalizeColumns(svd.getU()));
//this.eigenVectors = normalizeColumns(svd.getU());
//this.eigenValues = svd.getSingularValues();
}
/**
* Allocates and sets the principal components and the corresponding variation values. Is used for dimensionality reduction after
* the amount of principal components needed has been determined.
* @param v The variation values.
* @param pc The principal components.
*/
private void reduceDimensionality(double[] v, SimpleMatrix pc){
this.eigenValues = new double[numComponents];
this.eigenVectors = new SimpleMatrix(numPoints, numComponents);
for(int i = 0; i < numComponents; i++){
this.eigenVectors.setColValue(i, pc.getCol(i));
this.eigenValues[i] = v[i];
}
}
/**
* Projects training shape num onto the principal components.
* @param num
* @return
*/
public double[] projectTrainingShape(int num){
assert(this.eigenValues != null) : new Exception("Run analysis first.");
// ASSUMES THAT CONSENSUS IS SUBTRACTED AND SCALE IS MULTIPLIED
double[] weights = new double[numComponents];
SimpleVector shape = data.getCol(num);
for(int i = 0; i < numComponents; i++){
SimpleVector comp = eigenVectors.getCol(i);
double val = SimpleOperators.multiplyInnerProd(shape, comp);
shape.subtract(comp.multipliedBy(val));
weights[i] = val/eigenValues[i];
}
//double error = shape.normL2()/shape.getLen();
return weights;
}
/**
* Calculates the principal components that are necessary to reach the threshold of variation set in the class member.
* If the plot flag has been set to true, the variation analysis will be plotted as function of the principal components.
* @param ev The eigenvalues of the covariance matrix.
* @return The threshold index for the principal components.
*/
public int getPrincipalModesOfVariation(double[] ev){
double sum = 0;
for(int i = 0; i < ev.length; i++){
sum += ev[i];
}
double[] var = new double[ev.length];
var[0] = ev[0] / sum;
for(int i = 1; i < ev.length; i++){
var[i] = var[i-1] + ev[i] / sum;
}
int i = 0;
while(var[i] < variationThreshold && i<ev.length-1){
i++;
}
i++;
if(PLOT_SINGULAR_VALUES){
Plot plot = VisualizationUtil.createPlot(var, "Variation as function of principal component", "Principal Component", "Variation");
plot.show();
}
return (i<ev.length)?i:ev.length;
}
/**
* Applies weighting to the columns in the score matrix and hence calculates weighted variation of the data corresponding
* to a variation of the principal components. Weights are multiplied with the corresponding Eigenvalue.
* For the output points of dimensionality corresponding to the class member are assumed.
* Note that this the weights are formulated in terms of Variance not Standard-Deviation! Care has to be taken not to produce invalid shapes!
* @param weights The weights for the different principal components.
* @return The variation as point-like matrix.
*/
public SimpleMatrix applyWeight(float[] weights){
assert(weights.length == this.eigenVectors.getCols()) : new Exception("Weights don't match the size of the score matrix.");
SimpleVector col = new SimpleVector(numPoints);
for(int i = 0; i < weights.length; i++){
col.add(this.eigenVectors.getCol(i).multipliedBy(weights[i] * eigenValues[i]));
}
for(int i = 0; i < numVertices; i++){
SimpleVector row = data.consensus.getRow(i);
for(int j = 0; j < dimension; j++){
col.addToElement(i * dimension + j, row.getElement(j));
}
}
return toPointlikeMatrix(col);
}
/**
* Applies weighting to the columns in the score matrix and hence calculates weighted variation of the data corresponding
* to a variation of the principal components. Weights are multiplied with the corresponding Eigenvalue.
* For the output points of dimensionality corresponding to the class member are assumed.
* Note that this the weights are formulated in terms of Variance not Standard-Deviation! Care has to be taken not to produce invalid shapes!
* @param weights The weights for the different principal components.
* @return The variation as point-like matrix.
*/
public SimpleMatrix applyWeight(double[] weights){
assert(weights.length == this.eigenVectors.getCols()) : new Exception("Weights don't match the size of the score matrix.");
SimpleVector col = new SimpleVector(numPoints);
for(int i = 0; i < weights.length; i++){
col.add(this.eigenVectors.getCol(i).multipliedBy(weights[i] * eigenValues[i]));
}
for(int i = 0; i < numVertices; i++){
SimpleVector row = data.consensus.getRow(i);
for(int j = 0; j < dimension; j++){
col.addToElement(i * dimension + j, row.getElement(j));
}
}
return toPointlikeMatrix(col);
}
/**
* Subtracts the consensus from the data-sets and re-scales the data-sets.
* @param mat The data-sets.
* @return The data-set after consensus subtraction.
*/
private DataMatrix upscaleAndSubtractConsensus(DataMatrix mat){
for(int k = 0; k < numSamples; k++){
float factor = mat.scaling.get(k);
for(int i = 0; i < numVertices; i++){
SimpleVector row = mat.consensus.getRow(i);
for(int j = 0; j < dimension; j++){
mat.multiplyElementBy(i * dimension + j, k, factor);
mat.subtractFromElement(i * dimension + j, k, row.getElement(j));
}
}
}
return mat;
}
/**
* Scales the consensus data-set. The scaling used is the mean scaling of all data-sets in the sample.
* @param mat The data-sets.
* @return The data-set after consensus scaling.
*/
private DataMatrix scaleConsensus(DataMatrix mat){
float scale = 0;
for(int i = 0; i < numSamples; i++){
scale += mat.scaling.get(i) / numSamples;
}
SimpleMatrix con = new SimpleMatrix(mat.consensus.getRows(), mat.consensus.getCols());
for(int i = 0; i < numVertices; i++){
for(int j = 0; j < dimension; j++){
double val = mat.consensus.getElement(i,j) * scale;
con.setElementValue(i, j, val);
}
}
mat.consensus = con;
return mat;
}
/**
* Normalizes the columns of a matrix.
* @param m The matrix whose columns will be normalized.
* @return A matrix with normalized column vectors.
*/
private SimpleMatrix normalizeColumns(SimpleMatrix m){
SimpleMatrix norm = new SimpleMatrix(m.getRows(), m.getCols());
for(int j = 0; j < m.getCols(); j++){
double s = 0;
for(int i = 0; i < m.getRows(); i++){
s += Math.pow(m.getElement(i, j),2);
}
s = Math.sqrt(s);
norm.setColValue(j, m.getCol(j).dividedBy(s));
}
return norm;
}
/**
* Reshapes a vector to a matrix using the assumption, that the vector's entries are points of a certain dimension.
* @param vec The vector containing the data.
* @return The reshaped data as matrix.
*/
private SimpleMatrix toPointlikeMatrix(SimpleVector vec){
assert(vec.getLen() / dimension == numVertices) : new Exception("Dimensions don't match the input data.");
SimpleMatrix mat = new SimpleMatrix(numVertices, dimension);
for(int i = 0; i < numVertices; i++){
for(int j = 0; j < dimension; j++){
mat.setElementValue(i, j, vec.getElement(i * dimension + j));
}
}
return mat;
}
/**
* Plots the data in the array over its array index.
* @param data The data to be plotted.
*/
private void plot(double[] data){
if(PLOT_SINGULAR_VALUES){
new ImageJ();
Plot plot = VisualizationUtil.createPlot(data, "Singular values of data matrix", "Singular value", "Magnitude");
plot.show();
}
}
/**
* Returns the consensus object as one dimensional SimpleVector. Ordering is the same as in the data matrix:
* ( x_i y_i z_i x_(i+1) y_(i+1) z_(i+1) ... )
* @return The consensus object as single column vector.
*/
public SimpleVector getConsensus(){
SimpleVector c = new SimpleVector(data.consensus.getCols() * data.consensus.getRows());
for(int i = 0; i < data.consensus.getRows(); i++){
for(int j = 0; j < data.consensus.getCols(); j++){
c.setElementValue(i * data.consensus.getCols() + j, data.consensus.getElement(i, j));
}
}
return c;
}
}
/*
* Copyright (C) 2010-2014 Mathias Unberath
* CONRAD is developed as an Open Source project under the GNU General Public License (GPL).
*/