SECholeskyDecomposition.java example

/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2014 by RapidMiner and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapidminer.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.functions.kernel.rvm.util;

import java.util.LinkedList;

import com.rapidminer.tools.Tools;


import Jama.Matrix;

/**
 * A modified cholesky factorization. 
 * 
 * Given a n x n matrix A this decomposition produces a matrix L, such that
 * L * L' = A + E, with E being a minimal diagonal matrix making the sum of 
 * both matrices positive definite (and regular). In contrast to the standard
 * cholesky decomposition (as implemented in e.g. Jama) the matrix A doesn't 
 * have to be regular and positive definite as often happens due to numerical
 * instabilities. The determination of the diagonal elements of E is based
 * on Gerschgorin's circle theorem minimizing ||E||_inf.
 * 
 * The a-priori upper bound of ||E||_inf is linear in n;
 *  
 * REFERENCES:
 * 
 * Robert B. Schnabel and Elizabeth Eskow. 1990. "A New Modified Cholesky
 * Factorization," SIAM Journal of Scientific Statistical Computating,
 * 11, 6: 1136-58. 
 *
 * Robert B. Schnabel and Elizabeth Eskow. 1999. "A revised modified Cholesky
 * factorization algorithm," SIAM J. Optim., 9, 4: 1135--1148 (electronic)
 *
 * Jeff Gill and Gary King. 1998. "`Hessian not Invertable.' Help!"
 * manuscript in progress, Harvard University.
 *  
 * @author Piotr Kasprzak
 *
 */
public class SECholeskyDecomposition {

	/**
	 * The L-factor (lower triangle matrix) of matrix factorization 
	 * calculated in decompose().
	 * 
	 * NOTE: Due to pivoting the rows and columns DO NOT MATCH with the 
	 *       original matrix A.
	 */
	
	private Matrix L			= null;  

	/**
	 * The matrix used to reverse the pivot-transformations used in decompose()
	 * to construct L (PTR = Pivot Transform Reverse):
	 *   
	 * PTR * (L * L') * PTR' = A + E.
	 *  
	 * It's constructed from the reverse sequence of row/column swaps recorded
	 * in decompose(). 
	 */

	private Matrix PTR			= null;
	
	/**
	 * Data structures needed for the recording of the pivot transformations. 
	 */
	private static class PivotTransform {
		int pos1;
		int pos2;
		PivotTransform(int pos1, int pos2) {
			this.pos1 = pos1;
			this.pos2 = pos2;
		}
	}

	private LinkedList<PivotTransform> pivotTransformQueue = new LinkedList<PivotTransform>();
	
	private Matrix E			= null;			// The error-matrix E (a diagonal matrix) (with pivoting reversed)
	private double[] E_Diagonal	= null;			// The diagonal elements of E (with pivoting still applied)		
	private double ENorm		= 0;			// Infinity norm (= maximum of diagonals) of E
	
	private double detL			= 0;			// The determinant of L
	
	private int n				= 0;			// Size of A
	
	/**
	 * Constructors.
	 * 
	 * @param A
	 */
	
	public SECholeskyDecomposition(double[][] A) {
		this.decompose(new Matrix(A));
	}
	
	public SECholeskyDecomposition(Matrix A) {
		this.decompose(A);
	}	
	
	/**
	 * Swap rows i<->j and columns i<->j.
	 * 
	 * @param A
	 * @param i
	 * @param j
	 */
	
	private void swapRowsAndColumns(double[][] A, int i, int j, boolean isSymmetric, int offset) {

		double tmp;		
		int k;

		i += offset;
		j += offset;
		
		if (!isSymmetric) {
			
			/** Matrix is n x n standard (the easy case). */
			
			double[] tr;
			
			tr		= A[i];								// Swap rows of A
			A[i]	= A[j];
			A[j]	= tr;
		
			for (k = offset; k < n; k++) {				// Swap columns of A
				tmp	= A[k][i];
				A[k][i]	= A[k][j];
				A[k][j]	= tmp;
			}
			
		} else {
			
			/** Matrix is symmetric, only the lower triangle is used */

			int t;
			
			double[] ti = new double[n];
			double[] tj	= new double[n];
			
			if (i > j) {								// Make i < j
				t = i;
				i = j;
				j = t;
			}

			for (k = offset; k < i; k++) {				// Save i-th row / column
				ti[k] = A[i][k];
			}
			
			for (k = i; k < n; k++) {
				ti[k] = A[k][i];
			}

			tmp		= ti[i];							// simulate row / column swap
			ti[i]	= ti[j];
			ti[j]	= tmp;

			for (k = offset; k < j; k++) {				// safe j-th row / column
				tj[k] = A[j][k];
			}
			
			for (k = j; k < n; k++) {
				tj[k] = A[k][j];
			}

			tmp		= tj[i];							// simulate row / column swap
			tj[i]	= tj[j];
			tj[j]	= tmp;
			
			for (k = offset; k < i; k++) {				// Set i-th row (complete), j-th row (first part)
				A[i][k]	= tj[k];
				A[j][k] = ti[k];
			}
				
			for (k = i; k < j; k++) {					// Set i-th column (first part), j-th row (second & final part)
				A[k][i]	= tj[k];
				A[j][k] = ti[k];
			}
				
			for (k = j; k < n; k++) {					// Set i-th column (second & final part), j-th column (complete)
				A[k][i] = tj[k];
				A[k][j] = ti[k];
			}	
		}
	}
	
	/**
	 * Do the hard work.
	 * 
	 * @param MA_orig
	 */
	
	private void decompose(Matrix MA_orig) {
		
		double mach_eps	= 2.23e-16;							// Machine precision
		double tau			= Math.pow(mach_eps, 1.0 / 3.0);	// Cubic root of macheps
		double tau_mod		= Math.pow(mach_eps, 2.0 / 3.0);
		double mu			= 0.1;								// Part of the relaxation introduced in revised SE90 that allows
																// small negative diagonal elements in next submatrix.
																// \mu \in [0,1]
		
		n					= MA_orig.getRowDimension();
		E_Diagonal			= new double[n];
		
		Matrix MA			= (Matrix) MA_orig.clone();			// Don't override the original Matrix
		Matrix ML			= new Matrix(n, n, 0);				// L matrix factor
		
		double[][] A		= MA.getArray();
		double[][] L		= ML.getArray();
		
		double delta_prev	= 0;								// Previous diagonal element of E
		double delta		= 0;								// Current delta
		boolean phaseone	= true;								// We start in phase one
		
		int i, j, k;
		
		j = 0;													// Current iteration
		
		/** Do some sanity checks on A. */
		
		if (n != MA.getColumnDimension()) {
			// TODO: ERROR: Matrix not square
		}
		
		/** Determine the maximum magnitude of the diagonal elements of A. */
		
		double gamma		= 0;
		
		for (i = 0; i < n; i++) {
			if (Math.abs(A[i][i]) > gamma) {
				gamma = Math.abs(A[i][i]);
			}
		}

		/** PHASE ONE, matrix A potentially positive definite */
		
		while (j < n && phaseone) {
			
			/** Find the pivot element for current submatrix and the minimum
			 *  of the diagonal elements. 
			 */
			
			int pivot		= j;									// Pivot index
			double pivot_v	= Double.NEGATIVE_INFINITY;				// Pivot value
			
			double min		= Double.POSITIVE_INFINITY; 
			
			for (i = j; i < n; i++) {
				
				if (A[i][i] > pivot_v) {
					pivot_v	= A[i][i];
					pivot	= i;
				}
				
				if (A[i][i] < min) {
					min = A[i][i];
				}
			}
			
			/** Check for end of phase one, continue if
			 *  - pivot numerically > 0
			 *  - no large negative eigenvalue expected in the next submatrix 
			 */
			
			if (pivot_v < tau_mod * gamma || min < -mu * pivot_v) {
				phaseone = false;
				break;
			}
			
			/** Excecute the pivoting. */
			
			if (pivot != j) {

				swapRowsAndColumns(A, 0, pivot-j, true,  j);
				swapRowsAndColumns(L, j, pivot  , false, 0);

				pivotTransformQueue.add(new PivotTransform(pivot, j));				
			}
			
			/** Do a lookahead to check if the diagonal elements of the next
			 *  submatrix become too negative.
			 */
			
			min = Double.POSITIVE_INFINITY;
			double value;
			
			for (i = j+1; i < n; i++) {
			
				value = A[i][i] - A[i][j] * A[i][j] / A[j][j];
				if (value < min) {
					min = value;
				}
			}
			
			if (min < -mu * gamma) {
				phaseone = false;
				break;
			}
			
			/** Perform a iteration of standard cholesky factorization. */

			E_Diagonal[j] = 0;									// Current delta is 0
			
			L[j][j] = Math.sqrt(A[j][j]);						// Calculate the j-th diagonal element of L
			for (i = j+1; i < n; i++) {							// Calculate the rest of jth-column (below the diagonal) of L 
				L[i][j] = A[i][j] / L[j][j];
				for (k = j+1; k <= i; k++) {					// Update the i-th row of the next submatrix (in A)
					A[i][k] = A[i][k] - L[i][j] * L[k][j]; 
				}
			}
			
			j++;
		}
		
		/** PHASE TWO, matrix A NOT positive definite. */
		
		if (!phaseone && j == n-1) {
			
			/** Special case: last diagonal element negative. */
			
			delta			= -A[j][j] + Math.max(tau * -A[j][j] / (1.0 - tau), tau_mod * gamma);
			A[j][j] 		+= delta;
			L[j][j] 		= Math.sqrt(A[j][j]);
			E_Diagonal[j]	= delta;
			
		}
		
		if (!phaseone && j < n-1) {
			
			k = j;												// k + 1 == number of iteration performed in phase one
			
			/** Calculate lower Gerschgorin bounds of A_k. */
			
			double[] g = new double[n - k];
			
			for (i = k; i < n; i++) {
				
				double sum_l = 0, sum_r = 0;
				int l;
				
				for (l = k; l < i; l++) {						// Left side of the diagonal element
					sum_l += Math.abs(A[i][l]);
				}
				for (l = i+1; l < n; l++) {						// Right side of the diagonal element (using symmetry)
					sum_r += Math.abs(A[l][i]);
				}
				g[i-k] = A[i][i] - sum_l - sum_r;
			}
			
			/** The main iteration of PHASE TWO. */
			
			for (j = k; j < n-2; j++) {
				
				/** Pivot on maximum lower Gerschgorin bound. */
				
				int gmax		= j;
				double gmax_v	= Double.NEGATIVE_INFINITY;
				
				for (i = j; i < n; i++) {
					if (g[i-k] > gmax_v) {
						gmax_v	= g[i-k];
						gmax	= i;
					}
				}
				
				/** Excecute the pivoting. */
				
				if (gmax != j) {

					swapRowsAndColumns(A, 0, gmax-j, true,  j);
					swapRowsAndColumns(L, j, gmax,   false, 0);

					pivotTransformQueue.add(new PivotTransform(gmax, j));					
				}
				
				/** Calculate the delta to be added to the diagnoal element (= E_jj) */
				
				double normj = 0;
				for (i = j+1; i < n; i++) {
					normj += Math.abs(A[i][j]);
				}
				delta = Math.max(0, Math.max(-A[j][j] + Math.max(normj, tau_mod * gamma), delta_prev));
				if (delta > 0) {
					A[j][j] += delta;
					delta_prev = delta;
				}
			
				E_Diagonal[j] = delta;
				
				/** Update Gerschgorin lower bounds, not too sure this is
				 *  correct, but hey, no risk, no fun. */
				/* Shevek notes that this uses floating point
				 * equality and therefore the risk is higher than
				 * might have been assumed. */
				if (Tools.isNotEqual(A[j][j], normj)) {
					double t = 1.0 - normj / A[j][j];
					for (i = j+1; i < n; i++) {
						g[i-k] += t * Math.abs(A[i][j]);
					}
				}
				
				/** Perform a iteration of standard cholesky factorization. */
				
				L[j][j] = Math.sqrt(A[j][j]);						// Calculate the j-th diagonal element of L
				for (i = j+1; i < n; i++) {							// Calculate the rest of jth-column (below the diagonal) of L 
					L[i][j] = A[i][j] / L[j][j];
					for (int l = j+1; l <= i; l++) {					// Update the i-th row of the next submatrix (in A)
						A[i][l] = A[i][l] - L[i][j] * L[l][j]; 
					}
				}
				
			}
			
			/** Process the final 2 x 2 submatrix. */
			
			double[][] S	= new double[2][2];
			
			S[0][0] = A[n-2][n-2];
			S[1][1] = A[n-1][n-1];
			S[1][0] = S[0][1] = A[n-1][n-2];
			
			Matrix MS 		= new Matrix(S);
			double[] evs	= MS.eig().getRealEigenvalues();
			
			double lambda_lo = evs[0];
			double lambda_hi = evs[1];
			
			delta = Math.max(0, Math.max(-lambda_lo + Math.max(tau * (lambda_hi - lambda_lo) / (1.0 - tau), tau_mod * gamma), delta_prev));
			
			if (delta > 0) {
				A[n-2][n-2] += delta;
				A[n-1][n-1] += delta;
				delta_prev = delta;
			}
			
			L[n-2][n-2] = Math.sqrt(A[n-2][n-2]);
			L[n-1][n-2] = A[n-1][n-2] / L[n-2][n-2];
			L[n-1][n-1] = Math.sqrt(A[n-1][n-1] - L[n-1][n-2] * L[n-1][n-2]);
			
			E_Diagonal[n-2] = E_Diagonal[n-1] = delta;
		}
		
		this.L = ML;
		
		/** Because delta_i+1 >= delta_i, delta_prev == ||E||_inf */
		this.ENorm = delta_prev;
	}
	
	/**
	 * Build the Pivot-Transform-Reverse matrix PTR.
	 * 
	 * We start with a n x n identity matrix and for each recorded pivot
	 * transformation swap the according rows, reversing the sequence and
	 * starting with the last pivot transformation. 
	 */
	
	private void buildPTR() {
		
		double[]	temp_row; // = new double[n];
		double[][]	PTRA;
		
		int k;
		PivotTransform pt;
		
		PTR		= Matrix.identity(n, n);
		PTRA	= PTR.getArray();
		
		k 		= pivotTransformQueue.size();
		
		while (k-- > 0) {
			pt = pivotTransformQueue.removeLast();
			
			temp_row		= PTRA[pt.pos1];
			PTRA[pt.pos1]	= PTRA[pt.pos2];
			PTRA[pt.pos2]	= temp_row;
		}
	}
	
	/**
	 * Return the lower triangle matrix factor of the cholesky decomposition.
	 */
	
	public Matrix getL() {
		return L;
	}

	/**
	 * Return the matrix PTR with PTR * (L * L') * PTR' = A + E.
	 */
	
	public Matrix getPTR() {
		if (PTR == null) {
			buildPTR();
		}
		return PTR;
	}
		
	/**
	 * Return the diagonal of error-matrix E with the pivoting still applied.
	 */
	public double[] getE_Diagonal() {
		return E_Diagonal;
	}
	
	/**
	 * Return the error-matrix E with pivoting reversed.
	 */
	
	public Matrix getE() {
		
		if (E == null) {
			
			/* Create n x 1 vector with the diagonal elements of E */
			Matrix diagVector = new Matrix(E_Diagonal.clone(), n);
			
			/* Use PTR matrix to reverse the pivot transformations on this vector */
			if (PTR == null) buildPTR();
			diagVector = PTR.times(diagVector);
			
			E = new Matrix(n, n, 0.0);
			
			/* Because E is a diagonal matrix we can now easily build it from the
			 * pivot adjusted vector */
			for (int i = 0; i < n; i++) {
				E.set(i, i, diagVector.get(i, 0));
			}
		}
		
		return this.E;
	}
	
	/**
	 * Return the infinity norm of matrix E.
	 */
	
	public double getENorm() {
		return this.ENorm;
	}
	
	/**
	 * Return the determinant of L.
	 */
	
	public double getDetL() {
		
		if (detL == 0) {
			
			double det = 1;
			for (int i = 0; i < L.getRowDimension(); i++) {
				det *= L.get(i, i);
			}
			
			detL = det;
		}
		
		return detL;
	}
	
	/**
	 * Return the determinant of A.
	 */
	
	public double getDetA() {
		return (getDetL() * getDetL());
	}
}