/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.kernel.rvm.util;
import java.util.LinkedList;
import com.rapidminer.tools.Tools;
import Jama.Matrix;
/**
* A modified cholesky factorization.
*
* Given a n x n matrix A this decomposition produces a matrix L, such that
* L * L' = A + E, with E being a minimal diagonal matrix making the sum of
* both matrices positive definite (and regular). In contrast to the standard
* cholesky decomposition (as implemented in e.g. Jama) the matrix A doesn't
* have to be regular and positive definite as often happens due to numerical
* instabilities. The determination of the diagonal elements of E is based
* on Gerschgorin's circle theorem minimizing ||E||_inf.
*
* The a-priori upper bound of ||E||_inf is linear in n;
*
* REFERENCES:
*
* Robert B. Schnabel and Elizabeth Eskow. 1990. "A New Modified Cholesky
* Factorization," SIAM Journal of Scientific Statistical Computating,
* 11, 6: 1136-58.
*
* Robert B. Schnabel and Elizabeth Eskow. 1999. "A revised modified Cholesky
* factorization algorithm," SIAM J. Optim., 9, 4: 1135--1148 (electronic)
*
* Jeff Gill and Gary King. 1998. "`Hessian not Invertable.' Help!"
* manuscript in progress, Harvard University.
*
* @author Piotr Kasprzak
*
*/
public class SECholeskyDecomposition {
/**
* The L-factor (lower triangle matrix) of matrix factorization
* calculated in decompose().
*
* NOTE: Due to pivoting the rows and columns DO NOT MATCH with the
* original matrix A.
*/
private Matrix L = null;
/**
* The matrix used to reverse the pivot-transformations used in decompose()
* to construct L (PTR = Pivot Transform Reverse):
*
* PTR * (L * L') * PTR' = A + E.
*
* It's constructed from the reverse sequence of row/column swaps recorded
* in decompose().
*/
private Matrix PTR = null;
/**
* Data structures needed for the recording of the pivot transformations.
*/
private static class PivotTransform {
int pos1;
int pos2;
PivotTransform(int pos1, int pos2) {
this.pos1 = pos1;
this.pos2 = pos2;
}
}
private LinkedList<PivotTransform> pivotTransformQueue = new LinkedList<PivotTransform>();
private Matrix E = null; // The error-matrix E (a diagonal matrix) (with pivoting reversed)
private double[] E_Diagonal = null; // The diagonal elements of E (with pivoting still applied)
private double ENorm = 0; // Infinity norm (= maximum of diagonals) of E
private double detL = 0; // The determinant of L
private int n = 0; // Size of A
/**
* Constructors.
*
* @param A
*/
public SECholeskyDecomposition(double[][] A) {
this.decompose(new Matrix(A));
}
public SECholeskyDecomposition(Matrix A) {
this.decompose(A);
}
/**
* Swap rows i<->j and columns i<->j.
*
* @param A
* @param i
* @param j
*/
private void swapRowsAndColumns(double[][] A, int i, int j, boolean isSymmetric, int offset) {
double tmp;
int k;
i += offset;
j += offset;
if (!isSymmetric) {
/** Matrix is n x n standard (the easy case). */
double[] tr;
tr = A[i]; // Swap rows of A
A[i] = A[j];
A[j] = tr;
for (k = offset; k < n; k++) { // Swap columns of A
tmp = A[k][i];
A[k][i] = A[k][j];
A[k][j] = tmp;
}
} else {
/** Matrix is symmetric, only the lower triangle is used */
int t;
double[] ti = new double[n];
double[] tj = new double[n];
if (i > j) { // Make i < j
t = i;
i = j;
j = t;
}
for (k = offset; k < i; k++) { // Save i-th row / column
ti[k] = A[i][k];
}
for (k = i; k < n; k++) {
ti[k] = A[k][i];
}
tmp = ti[i]; // simulate row / column swap
ti[i] = ti[j];
ti[j] = tmp;
for (k = offset; k < j; k++) { // safe j-th row / column
tj[k] = A[j][k];
}
for (k = j; k < n; k++) {
tj[k] = A[k][j];
}
tmp = tj[i]; // simulate row / column swap
tj[i] = tj[j];
tj[j] = tmp;
for (k = offset; k < i; k++) { // Set i-th row (complete), j-th row (first part)
A[i][k] = tj[k];
A[j][k] = ti[k];
}
for (k = i; k < j; k++) { // Set i-th column (first part), j-th row (second & final part)
A[k][i] = tj[k];
A[j][k] = ti[k];
}
for (k = j; k < n; k++) { // Set i-th column (second & final part), j-th column (complete)
A[k][i] = tj[k];
A[k][j] = ti[k];
}
}
}
/**
* Do the hard work.
*
* @param MA_orig
*/
private void decompose(Matrix MA_orig) {
double mach_eps = 2.23e-16; // Machine precision
double tau = Math.pow(mach_eps, 1.0 / 3.0); // Cubic root of macheps
double tau_mod = Math.pow(mach_eps, 2.0 / 3.0);
double mu = 0.1; // Part of the relaxation introduced in revised SE90 that allows
// small negative diagonal elements in next submatrix.
// \mu \in [0,1]
n = MA_orig.getRowDimension();
E_Diagonal = new double[n];
Matrix MA = (Matrix) MA_orig.clone(); // Don't override the original Matrix
Matrix ML = new Matrix(n, n, 0); // L matrix factor
double[][] A = MA.getArray();
double[][] L = ML.getArray();
double delta_prev = 0; // Previous diagonal element of E
double delta = 0; // Current delta
boolean phaseone = true; // We start in phase one
int i, j, k;
j = 0; // Current iteration
/** Do some sanity checks on A. */
if (n != MA.getColumnDimension()) {
// TODO: ERROR: Matrix not square
}
/** Determine the maximum magnitude of the diagonal elements of A. */
double gamma = 0;
for (i = 0; i < n; i++) {
if (Math.abs(A[i][i]) > gamma) {
gamma = Math.abs(A[i][i]);
}
}
/** PHASE ONE, matrix A potentially positive definite */
while (j < n && phaseone) {
/** Find the pivot element for current submatrix and the minimum
* of the diagonal elements.
*/
int pivot = j; // Pivot index
double pivot_v = Double.NEGATIVE_INFINITY; // Pivot value
double min = Double.POSITIVE_INFINITY;
for (i = j; i < n; i++) {
if (A[i][i] > pivot_v) {
pivot_v = A[i][i];
pivot = i;
}
if (A[i][i] < min) {
min = A[i][i];
}
}
/** Check for end of phase one, continue if
* - pivot numerically > 0
* - no large negative eigenvalue expected in the next submatrix
*/
if (pivot_v < tau_mod * gamma || min < -mu * pivot_v) {
phaseone = false;
break;
}
/** Excecute the pivoting. */
if (pivot != j) {
swapRowsAndColumns(A, 0, pivot-j, true, j);
swapRowsAndColumns(L, j, pivot , false, 0);
pivotTransformQueue.add(new PivotTransform(pivot, j));
}
/** Do a lookahead to check if the diagonal elements of the next
* submatrix become too negative.
*/
min = Double.POSITIVE_INFINITY;
double value;
for (i = j+1; i < n; i++) {
value = A[i][i] - A[i][j] * A[i][j] / A[j][j];
if (value < min) {
min = value;
}
}
if (min < -mu * gamma) {
phaseone = false;
break;
}
/** Perform a iteration of standard cholesky factorization. */
E_Diagonal[j] = 0; // Current delta is 0
L[j][j] = Math.sqrt(A[j][j]); // Calculate the j-th diagonal element of L
for (i = j+1; i < n; i++) { // Calculate the rest of jth-column (below the diagonal) of L
L[i][j] = A[i][j] / L[j][j];
for (k = j+1; k <= i; k++) { // Update the i-th row of the next submatrix (in A)
A[i][k] = A[i][k] - L[i][j] * L[k][j];
}
}
j++;
}
/** PHASE TWO, matrix A NOT positive definite. */
if (!phaseone && j == n-1) {
/** Special case: last diagonal element negative. */
delta = -A[j][j] + Math.max(tau * -A[j][j] / (1.0 - tau), tau_mod * gamma);
A[j][j] += delta;
L[j][j] = Math.sqrt(A[j][j]);
E_Diagonal[j] = delta;
}
if (!phaseone && j < n-1) {
k = j; // k + 1 == number of iteration performed in phase one
/** Calculate lower Gerschgorin bounds of A_k. */
double[] g = new double[n - k];
for (i = k; i < n; i++) {
double sum_l = 0, sum_r = 0;
int l;
for (l = k; l < i; l++) { // Left side of the diagonal element
sum_l += Math.abs(A[i][l]);
}
for (l = i+1; l < n; l++) { // Right side of the diagonal element (using symmetry)
sum_r += Math.abs(A[l][i]);
}
g[i-k] = A[i][i] - sum_l - sum_r;
}
/** The main iteration of PHASE TWO. */
for (j = k; j < n-2; j++) {
/** Pivot on maximum lower Gerschgorin bound. */
int gmax = j;
double gmax_v = Double.NEGATIVE_INFINITY;
for (i = j; i < n; i++) {
if (g[i-k] > gmax_v) {
gmax_v = g[i-k];
gmax = i;
}
}
/** Excecute the pivoting. */
if (gmax != j) {
swapRowsAndColumns(A, 0, gmax-j, true, j);
swapRowsAndColumns(L, j, gmax, false, 0);
pivotTransformQueue.add(new PivotTransform(gmax, j));
}
/** Calculate the delta to be added to the diagnoal element (= E_jj) */
double normj = 0;
for (i = j+1; i < n; i++) {
normj += Math.abs(A[i][j]);
}
delta = Math.max(0, Math.max(-A[j][j] + Math.max(normj, tau_mod * gamma), delta_prev));
if (delta > 0) {
A[j][j] += delta;
delta_prev = delta;
}
E_Diagonal[j] = delta;
/** Update Gerschgorin lower bounds, not too sure this is
* correct, but hey, no risk, no fun. */
/* Shevek notes that this uses floating point
* equality and therefore the risk is higher than
* might have been assumed. */
if (Tools.isNotEqual(A[j][j], normj)) {
double t = 1.0 - normj / A[j][j];
for (i = j+1; i < n; i++) {
g[i-k] += t * Math.abs(A[i][j]);
}
}
/** Perform a iteration of standard cholesky factorization. */
L[j][j] = Math.sqrt(A[j][j]); // Calculate the j-th diagonal element of L
for (i = j+1; i < n; i++) { // Calculate the rest of jth-column (below the diagonal) of L
L[i][j] = A[i][j] / L[j][j];
for (int l = j+1; l <= i; l++) { // Update the i-th row of the next submatrix (in A)
A[i][l] = A[i][l] - L[i][j] * L[l][j];
}
}
}
/** Process the final 2 x 2 submatrix. */
double[][] S = new double[2][2];
S[0][0] = A[n-2][n-2];
S[1][1] = A[n-1][n-1];
S[1][0] = S[0][1] = A[n-1][n-2];
Matrix MS = new Matrix(S);
double[] evs = MS.eig().getRealEigenvalues();
double lambda_lo = evs[0];
double lambda_hi = evs[1];
delta = Math.max(0, Math.max(-lambda_lo + Math.max(tau * (lambda_hi - lambda_lo) / (1.0 - tau), tau_mod * gamma), delta_prev));
if (delta > 0) {
A[n-2][n-2] += delta;
A[n-1][n-1] += delta;
delta_prev = delta;
}
L[n-2][n-2] = Math.sqrt(A[n-2][n-2]);
L[n-1][n-2] = A[n-1][n-2] / L[n-2][n-2];
L[n-1][n-1] = Math.sqrt(A[n-1][n-1] - L[n-1][n-2] * L[n-1][n-2]);
E_Diagonal[n-2] = E_Diagonal[n-1] = delta;
}
this.L = ML;
/** Because delta_i+1 >= delta_i, delta_prev == ||E||_inf */
this.ENorm = delta_prev;
}
/**
* Build the Pivot-Transform-Reverse matrix PTR.
*
* We start with a n x n identity matrix and for each recorded pivot
* transformation swap the according rows, reversing the sequence and
* starting with the last pivot transformation.
*/
private void buildPTR() {
double[] temp_row; // = new double[n];
double[][] PTRA;
int k;
PivotTransform pt;
PTR = Matrix.identity(n, n);
PTRA = PTR.getArray();
k = pivotTransformQueue.size();
while (k-- > 0) {
pt = pivotTransformQueue.removeLast();
temp_row = PTRA[pt.pos1];
PTRA[pt.pos1] = PTRA[pt.pos2];
PTRA[pt.pos2] = temp_row;
}
}
/**
* Return the lower triangle matrix factor of the cholesky decomposition.
*/
public Matrix getL() {
return L;
}
/**
* Return the matrix PTR with PTR * (L * L') * PTR' = A + E.
*/
public Matrix getPTR() {
if (PTR == null) {
buildPTR();
}
return PTR;
}
/**
* Return the diagonal of error-matrix E with the pivoting still applied.
*/
public double[] getE_Diagonal() {
return E_Diagonal;
}
/**
* Return the error-matrix E with pivoting reversed.
*/
public Matrix getE() {
if (E == null) {
/* Create n x 1 vector with the diagonal elements of E */
Matrix diagVector = new Matrix(E_Diagonal.clone(), n);
/* Use PTR matrix to reverse the pivot transformations on this vector */
if (PTR == null) buildPTR();
diagVector = PTR.times(diagVector);
E = new Matrix(n, n, 0.0);
/* Because E is a diagonal matrix we can now easily build it from the
* pivot adjusted vector */
for (int i = 0; i < n; i++) {
E.set(i, i, diagVector.get(i, 0));
}
}
return this.E;
}
/**
* Return the infinity norm of matrix E.
*/
public double getENorm() {
return this.ENorm;
}
/**
* Return the determinant of L.
*/
public double getDetL() {
if (detL == 0) {
double det = 1;
for (int i = 0; i < L.getRowDimension(); i++) {
det *= L.get(i, i);
}
detL = det;
}
return detL;
}
/**
* Return the determinant of A.
*/
public double getDetA() {
return (getDetL() * getDetL());
}
}