package edu.stanford.rsl.conrad.optimization; import edu.stanford.rsl.conrad.data.numeric.NumericGrid; import edu.stanford.rsl.conrad.numerics.SimpleMatrix; import edu.stanford.rsl.conrad.numerics.SimpleMatrix.InversionType; import edu.stanford.rsl.conrad.numerics.SimpleOperators; import edu.stanford.rsl.conrad.numerics.SimpleVector; import edu.stanford.rsl.conrad.utils.CONRAD; /** * This class implements the Levenberg-Marquardt algorithm for the optimization of functions. * Derivatives with respect to each parameter have to be defined in the OptimizableFunction. * @author Mathias Unberath */ public class LMA { private boolean PRINT = true; /** * Flag to decide whether or not numerical derivatives shall be computed. */ private boolean NUMERICAL_DERIVATIVE = false; /** * Step-size for the computation of numerical derivatives. */ private double step = 1e-7; /** * The function used to transform the data points when approximating the values. */ private OptimizableFunction func; /** * The measured function values that will be approximated. * values = f(data, parameters) */ private NumericGrid values; /** * The data that will be transformed to approximate the values. */ private NumericGrid data; /** Dimension of the Grid. */ private int[] dim; /** * Weights for each data point. * Objective function: chi2 = sum[ ( values - f(data,parameters) )^2 * weights ] */ private NumericGrid weights; /** * Number of parameters of the transformation function. */ private int nParam; /** * The parameters of the transforming function. */ private double[] parameters; /** * Parameters after update. */ private double[] parametersIncremented; /** Value of objective function chi2 = sum[ ( values - f(data,parameters) )^2 * weights ] */ private double chi2; /** Value of objective function chi2 after the update step. */ private double chi2Incremented; /** Iteration counter */ private int nIter; /** Jacobian matrix w.r.t the parameters. */ private SimpleMatrix alpha; private SimpleVector beta; /** Update steps for parameters. */ private SimpleVector dParam; /** Damping factor. */ private double lambda = 0.0001; /** Update factor for damping parameter lambda, lambda is multiplied or divided depending on * whether or not the update was successful. */ private double lambdaFactor = 10; /** Minimal error update, end condition */ private double deltaChi2Min = 1e-10; /** Maximum number of iterations. */ private int maxIter = 100; //========================================================================================== // METHODS //========================================================================================== public LMA(NumericGrid values, NumericGrid data, OptimizableFunction func, boolean NUMERICAL_DERIVATIVE){ this.NUMERICAL_DERIVATIVE = NUMERICAL_DERIVATIVE; this.values = values; this.data = data; this.func = func; this.dim = values.getSize(); assert(dim == data.getSize()) : new IllegalArgumentException("Sizes of data and value grids must match!"); this.nParam = func.getNumberOfParameters(); this.parameters = func.getInitialParameters(); this.parametersIncremented = new double[nParam]; this.weights = func.getWeights(); this.alpha = new SimpleMatrix(nParam, nParam); this.beta = new SimpleVector(nParam); this.dParam = new SimpleVector(nParam); } /** * Sets the step size for the calculation of the numerical derivative. * @param step The step size to be used. */ public void setDerivativeStepSize(double step){ this.step = step; } /** * Returns the parameters after optimization. * @return The final parameters */ public double[] getFinalParameters(){ return this.parameters; } /** * Set the parameters and perform optimization. If the parameters are set to be smaller or equal to 0, the default values will be used. * @param lambda The step width. * @param deltaChi2Min The stop condition for the update. * @param maxIter The stop condition for number of iterations. */ public void run(double lambda, double deltaChi2Min, int maxIter){ System.out.println(); System.out.println("Starting Levenberg-Marquardt optimization.\n"); if(lambda <= 0){ System.out.println("Using default value for lambda: " + this.lambda); }else{ this.lambda = lambda; System.out.println("Lambda is: " + this.lambda); } if(deltaChi2Min <= 0){ System.out.println("Using default value for minimum update step: " + this.deltaChi2Min); }else{ this.deltaChi2Min = deltaChi2Min; System.out.println("Minimum update step is: " + this.deltaChi2Min); } if(maxIter <= 0){ System.out.println("Using default value for maximum number of iterations: " + this.maxIter); }else{ this.maxIter = maxIter; System.out.println("Maximum number of iterations is: " + this.maxIter); } try { runInternal(); } catch (Exception e) { e.printStackTrace(); } } /** * Performs the optimization. Is called by the public run method that allows for the setting of different optimization parameters. * @throws Exception */ private void runInternal() throws Exception{ nIter = 0; // check if input parameters are valid chi2 = getChi2(); if(Double.isNaN(chi2)){ throw new IllegalArgumentException("Initial function parameters seem to be invalid."); } // perform optimization while( !stop() ){ chi2 = getChi2(); updateAlpha(); updateBeta(); if(PRINT){ String param =""; for(int ipar = 0; ipar < nParam; ipar ++){ param += Double.valueOf(parameters[ipar]) + " "; } System.out.println(nIter + " : " + chi2 + " : " + param); } try{ getIncrements(); chi2Incremented = getChi2Incremented(); // if update was worse than initial guess, increase damping, e.g. smaller step if( (chi2Incremented >= chi2) || (Double.isNaN(chi2Incremented)) ){ lambda *= lambdaFactor; }else{ // if update was successful decrease damping, e.g. bigger step lambda /= lambdaFactor; updateParameters(); } }catch(Exception e){ // only throw exception if it was the last step // if intermediate step, try increased damping if(nIter == maxIter ){ e.printStackTrace(); throw e; }else{ lambda *= lambdaFactor; } } nIter++; } printResults(); } /** * Calculates all elements of the beta vector and sets them at the corresponding position. */ private void updateBeta(){ for(int i = 0; i < nParam; i++){ this.beta.setElementValue(i, getBetaElement(i)); } } /** * Calculates an entry of the beta vector. This vector is the right-hand-side of the Levenberg-Marquardt function. * Used to be calculated in the OptimizableFunction. This approach is still possible if changing the code to call the function's method, * however we do not recommend this. * General procedure: pseudo code * * for all points * sum += weight * function.derive(atPoint, param, wrtComponent) * ( values(atPoint) - function.evaluate(atPoint, param) ) * end * * @param row Index of derivative parameter * @return The beta element for this parameter */ private double getBetaElement(int row){ int nVal = this.values.getNumberOfElements(); double[] dF = new double[nVal]; for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); if(NUMERICAL_DERIVATIVE){ dF[i] = getNumericalDerivativeAtPoint(data, idx, parameters, row); }else{ dF[i] = func.getDerivativeAtPoint(data, idx, parameters, row); } } double val = 0; for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); double diff = values.getValue(idx) - func.evaluateAtPoint(data, idx, parameters); val += weights.getValue(idx) * dF[i] * diff; } return val; } /** * Calculates all elements of the alpha matrix and sets them at the corresponding positions. */ private void updateAlpha(){ for(int i = 0; i < nParam; i++){ for(int j = 0; j < nParam; j++){ //System.out.println("Calculating alpha: " + i + " , " + j); alpha.setElementValue(i, j, getAlphaValue(i, j)); } } } /** * Calculates the entry at position (row, col) in the Jacobian Matrix. * Used to be calculated in the OptimizableFunction. This approach is still possible if changing the code here to call the function's method, * however we do not encourage this. * General procedure: pseudo code * * for all points * sum += weight * function.derive(atPoint, param, wrtComponent1) * function.derive(atPoint, param, wrtComponent2) * end * if diagonal: sum *= (1 + lambda) * * @param row The index of first derivative parameter * @param col The index of second derivative parameter * @return The Jacobian entry */ private double getAlphaValue(int row, int col){ double val = getJacobianElement(data, parameters, row, col, weights); return val; } /** * Calculates the Jacobian element at index (row,col) in the manner specified in the calling method. * @param data * @param paramters * @param row * @param col * @param weights * @return The Jacobian element */ private double getJacobianElement(NumericGrid data, double[] paramters, int row, int col, NumericGrid weights){ int nVal = this.values.getNumberOfElements(); double[] dRow = new double[nVal]; double[] dCol = new double[nVal]; if(NUMERICAL_DERIVATIVE){ for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); dRow[i] = getNumericalDerivativeAtPoint(data, idx, parameters, row); } for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); dCol[i] = getNumericalDerivativeAtPoint(data, idx, parameters, col); } }else{ for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); dRow[i] = func.getDerivativeAtPoint(data, idx, parameters, row); } for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); dCol[i] = func.getDerivativeAtPoint(data, idx, parameters, col); } } double val = 0; for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); val += weights.getValue(idx) * dRow[i] * dCol[i]; } if(row == col){ val *= (1 + lambda); } return val; } /** * Calculates the numerical derivative using finite differencing. We use a a symmetric difference of this.step in forward and backward direction. * @param data Data array containing the function values to be evaluated * @param idx The index of the point to be evaluated * @param param The current parameter estimate * @param dP The parameter w.r.t. which the derivative shall be computed * @return The numerical derivative at this location */ private double getNumericalDerivativeAtPoint(NumericGrid data, int[] idx, double[] param, int dP){ double[] lower = param.clone(); lower[dP] -= this.step; double[] upper = param.clone(); upper[dP] += this.step; return ( (func.evaluateAtPoint(data, idx, upper) - func.evaluateAtPoint(data, idx, lower)) / (2 * step)); } /** * Method to compute a multi-dimensional array index from a linear index. Implemented from: * http://math.stackexchange.com/questions/19765/calculating-coordinates-from-a-flattened-3d-array-when-you-know-the-size-index * @param lin The linear index. * @return The multi-dimensional index. */ private int[] linearToArrayIndex(int lin){ int[] idx = new int[this.dim.length]; for(int i = this.dim.length-1; i >= 0; i--){ int val = 1; for(int j = 0; j < i; j++){ val *= this.dim[j]; } idx[i] = lin / val; lin -= idx[i]*val; } return idx; } /** * Calculates the incremented parameters using the alpha matrix. Make sure to check if inverse is well conditioned. * @throws Exception */ private void getIncrements() throws Exception{ this.alpha = alpha.inverse(InversionType.INVERT_SVD); if(alpha.isSingular(CONRAD.DOUBLE_EPSILON)){ throw new Exception("Matrix is singular."); } dParam = SimpleOperators.multiply(alpha, beta); for(int i = 0; i < nParam; i++){ this.parametersIncremented[i] = this.parameters[i] + dParam.getElement(i); } } /** * This method overwrites the parameters with the incremented parameters, if accepted. */ private void updateParameters(){ for(int i = 0; i < nParam; i++){ this.parameters[i] = this.parametersIncremented[i]; } } /** * Prints the results after optimization to the console window. */ private void printResults(){ System.out.println("Optimization ended after : " + nIter + " iterations."); System.out.println("_____________________________________________________"); System.out.println("Goodness of fit: " + chi2 / data.getNumberOfElements()); } /** * Evaluates the convergence criterion against the forced end condition. * @return Whether or not to continue. */ private boolean stop(){ double delta = Math.abs(chi2 - chi2Incremented); return ( (delta < deltaChi2Min) || (nIter > maxIter) ); } /** * Returns the chi2 value for the current parameters. * @return Current chi2 */ private double getChi2(){ return getChi2(this.parameters); } /** * Returns the chi2 for the incremented parameters. * @return Chi2 after applying parameter increments */ private double getChi2Incremented(){ return getChi2(this.parametersIncremented); } /** * Calculates chi2 for a set of parameters using the function. * @param param The parameters to evaluate the function. * @return chi2 */ private double getChi2(double[] param){ int nVal = this.values.getNumberOfElements(); double val = 0; for(int i = 0; i < nVal; i++){ int[] idx = linearToArrayIndex(i); val += weights.getValue(idx) * Math.pow(values.getValue(idx) - func.evaluateAtPoint(data, idx, param), 2); } return val; } } /* * Copyright (C) 2010-2014 Mathias Unberath * CONRAD is developed as an Open Source project under the GNU General Public License (GPL). */