/*
* ConjugateGradientSearch.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.math;
/**
* minimization of a real-valued function of
* several variables using a the nonlinear
* conjugate gradient method where several variants of the direction
* update are available (Fletcher-Reeves, Polak-Ribiere,
* Beale-Sorenson, Hestenes-Stiefel) and bounds are respected.
* Gradients are computed numerically if they are not supplied by the
* user. The line search is entirely based on derivative
* evaluation, similar to the strategy used in macopt (Mackay).
*
* @version $Id: ConjugateGradientSearch.java,v 1.3 2005/05/24 20:26:00 rambaut Exp $
*
* @author Korbinian Strimmer
*/
public class ConjugateGradientSearch extends MultivariateMinimum
{
//
// Public stuff
//
public final static int FLETCHER_REEVES_UPDATE = 0;
public final static int POLAK_RIBIERE_UPDATE = 1;
public final static int BEALE_SORENSON_HESTENES_STIEFEL_UPDATE = 2;
// Variables that control aspects of the inner workings of the
// minimization algorithm. Setting them is optional, they
// are all set to some reasonable default values given below.
/**
* controls the printed output from the routine
* (0 -> no output, 1 -> print only starting and final values,
* 2 -> detailed map of the minimisation process),
* the default value is 0
*/
public int prin = 0;
/**
* defaultStep is a steplength parameter and should be set equal
* to the expected distance from the solution (in a line search)
* exceptionally small or large values of defaultStep lead to
* slower convergence on the first few iterations (the step length
* itself is adapted during search), the default value is 1.0
*/
public double defaultStep = 1.0;
/**
* conjugateGradientStyle determines the method for the
* conjugate gradient direction update
* update (0 -> Fletcher-Reeves, 1 -> Polak-Ribiere,
* 2 -> Beale-Sorenson, Hestenes-Stiefel), the default is 2.
*/
public int conjugateGradientStyle = BEALE_SORENSON_HESTENES_STIEFEL_UPDATE;
public ConjugateGradientSearch() {
}
public ConjugateGradientSearch(int conGradStyle) {
this.conjugateGradientStyle = conGradStyle;
}
// implementation of abstract method
public void optimize(MultivariateFunction f, double[] x, double tolfx, double tolx)
{
optimize(f,x,tolfx,tolx,null);
}
public void optimize(MultivariateFunction f, double[] x, double tolfx, double tolx, MinimiserMonitor monitor)
{
xvec = x;
numArgs = f.getNumArguments();
boolean numericalGradient;
if (f instanceof MFWithGradient)
{
numericalGradient = false;
fgrad = (MFWithGradient) f;
}
else
{
numericalGradient = true;
fgrad = null;
}
// line function
LineFunction lf = new LineFunction(f);
// xvec contains current guess for minimum
lf.checkPoint(xvec);
double[] xold = new double[numArgs];
copy(xold, xvec);
// function value and gradient at current guess
numFun = 0;
double fx;
numGrad = 0;
gvec = new double[numArgs];
if (numericalGradient)
{
fx = f.evaluate(xvec);
numFun++;
NumericalDerivative.gradient(f, xvec, gvec);
numFun += 2*numArgs;
}
else
{
fx = fgrad.evaluate(xvec, gvec);
numFun++;
numGrad++;
}
double[] gold = new double[numArgs];
copy(gold, gvec);
// init stop condition
stopCondition(fx, xvec, tolfx, tolx, true);
// currently active variables
boolean[] active = new boolean[numArgs];
double numActive = lf.checkVariables(xvec, gvec, active);
// if no variables are active return
if (numActive == 0)
{
return;
}
// initial search direction (steepest descent)
sdir = new double[numArgs];
steepestDescentDirection(sdir, gvec, active);
lf.update(xvec, sdir);
// slope at start point in initial direction
double slope = gradientProjection(sdir, gvec);
if (prin > 0)
{
System.out.println("--- starting minimization ---");
System.out.println("... current parameter settings ...");
System.out.println("... tolx ... " + tolx);
System.out.println("... tolfx ... " + tolfx);
System.out.println("... maxFun ... " + maxFun);
System.out.println();
printVec("... start vector ...", xvec);
System.out.println();
printVec("... start direction ...", sdir);
}
int numLin = 0;
lastStep = defaultStep;
while(true)
{
// determine an appropriate step length
double step = findStep(lf, fx, slope, numericalGradient);
lastStep = step;
numLin++;
// update xvec
lf.getPoint(step, xvec);
lf.checkPoint(xvec);
// function value at current guess
if (numericalGradient)
{
fx = f.evaluate(xvec);
numFun++;
}
else
{
// compute gradient as well
fx = fgrad.evaluate(xvec, gvec);
numFun++;
numGrad++;
}
// test for for convergence
if (stopCondition(fx, xvec, tolfx, tolx, false)
|| (maxFun > 0 && numFun > maxFun))
{
break;
}
// Compute numerical gradient
if (numericalGradient)
{
NumericalDerivative.gradient(f, xvec, gvec);
numFun += 2*numArgs;
}
numActive = lf.checkVariables(xvec, gvec, active);
// if all variables are inactive return
if (numActive == 0)
{
break;
}
// determine new search direction
conjugateGradientDirection(sdir, gvec, gold, active);
lf.checkDirection(xvec, sdir);
// compute slope in new direction
slope = gradientProjection(sdir, gvec);
if (slope >= 0)
{
//reset to steepest descent direction
steepestDescentDirection(sdir, gvec, active);
// compute slope in new direction
slope = gradientProjection(sdir, gvec);
// reset to default step length
lastStep = defaultStep;
}
// other updates
lf.update(xvec, sdir);
copy(xold, xvec);
copy(gold, gvec);
if (prin > 1)
{
System.out.println();
System.out.println("Function value: " +
f.evaluate(xvec));
System.out.println();
printVec("... new vector ...", xvec);
System.out.println();
printVec("... new direction ...", sdir);
System.out.println("... numFun ... " + numFun);
if (!numericalGradient)
{
System.out.println("... numGrad ... " + numGrad);
}
System.out.println("... numLin ... " + numLin);
System.out.println();
}
if(monitor!=null) {
monitor.newMinimum(f.evaluate(xvec),xvec);
}
}
if (prin > 0)
{
System.out.println();
printVec("... final vector ...", xvec);
System.out.println("... numFun ... " + numFun);
System.out.println("... numLin ... " + numLin);
System.out.println();
System.out.println("--- end of minimization ---");
}
}
//
// Private stuff
//
private int numArgs, numGrad;
private double lastStep;
private double[] xvec, gvec, sdir;
private MFWithGradient fgrad;
private double findStep(LineFunction lf, double f0, double s0, boolean numericalGradient)
{
// f0 function value at step = 0
// s0 slope at step = 0
double step;
double maxStep = lf.getUpperBound();
if (maxStep <= 0 || s0 == 0)
{
return 0.0;
}
//step = Math.abs(lf.findMinimum());
// growing/shrinking factors for bracketing
double g1 = 2.0;
double g2 = 1.25;
double g3 = 0.5;
// x1 and x2 try to bracket the minimum
double x1 = 0;
double s1 = s0;
double x2 = lastStep*g2;
if(x2 > maxStep)
{
x2 = maxStep*g3;
}
double s2 = computeDerivative(lf, x2, numericalGradient);
// we need to go further to bracket minimum
boolean boundReached = false;
while (s2 <= 0 && !boundReached)
{
x1 = x2;
s1 = s2;
x2 = x2*g1;
if (x2 > maxStep)
{
x2 = maxStep;
boundReached = true;
}
s2 = computeDerivative(lf, x2, numericalGradient);
}
// determine step length by quadratic interpolation
// for minimum in interval [x1,x2]
if (s2 <= 0)
{
// true local minimum could NOT be bracketed
// instead we have a local minimum on a boundary
step = x2;
}
else
{
// minimum is bracketed
step = (x1*s2-x2*s1)/(s2-s1);
// note that nominator is always positive
}
// just to be safe - should not be necessary
if (step >= maxStep)
{
step = maxStep;
}
if (step < 0)
{
step = 0;
}
return step;
}
private double computeDerivative(LineFunction lf, double lambda, boolean numericalGradient)
{
if (numericalGradient)
{
numFun += 2;
return NumericalDerivative.firstDerivative(lf, lambda);
}
else
{
/* lf.getPoint(lambda, xvec);
lf.checkPoint(xvec);
fgrad.computeGradient(xvec, gvec);
numGrad++;
return gradientProjection(sdir, gvec);
*/
// the following code prevents overstepping
// and is due to Jesse Stone <jessestone@yahoo.com>
double[] xtmp = new double[numArgs];
copy(xtmp, xvec);
lf.getPoint(lambda, xtmp);
lf.checkPoint(xtmp);
fgrad.computeGradient(xtmp, gvec);
numGrad++;
return gradientProjection(sdir, gvec);
}
}
/*
private void testStep(double f0, double s0, double f1, double s1, double step)
{
// f0 function value at x=0
// s0 slope at x=0
// f1 function value at x=step
// f2 function value at x=step
double mue = 0.0001;
double eta = 0.9;
// sufficent decrease in function value
if (f1 <= mue*s0*step + f0)
{
System.out.println("<<< Sufficient decrease in function value");
}
else
{
System.out.println("<<< NO sufficient decrease in function value");
}
// sufficient decrease in slope
if (Math.abs(s1) <= eta*Math.abs(s0))
{
System.out.println("<<< Sufficient decrease in slope");
}
else
{
System.out.println("<<< NO sufficient decrease in slope");
}
}
*/
private void conjugateGradientDirection(double[] sdir, double[] gvec, double[] gold,
boolean[] active)
{
double gg = 0;
double dgg = 0;
for (int i = 0; i < numArgs; i++)
{
if (active[i])
{
switch (conjugateGradientStyle)
{
case 0:
// Fletcher-Reeves
dgg += gvec[i]*gvec[i];
gg += gold[i]*gold[i];
break;
case 1:
// Polak-Ribiere
dgg += gvec[i]*(gvec[i]-gold[i]);
gg += gold[i]*gold[i];
break;
case 2:
// Beale-Sorenson
// Hestenes-Stiefel
dgg += gvec[i] * (gvec[i] - gold[i]);
gg += sdir[i] * (gvec[i] - gold[i]);
break;
}
}
}
double beta = dgg/gg;
if (beta < 0 || gg == 0)
{
// better convergence (Gilbert and Nocedal)
beta = 0;
}
for (int i = 0; i < numArgs; i++)
{
if (active[i])
{
sdir[i] = -gvec[i] + beta*sdir[i];
}
else
{
sdir[i] = 0;
}
}
}
private void steepestDescentDirection(double[] sdir, double[] gvec, boolean[] active)
{
for (int i = 0; i < numArgs; i++)
{
if (active[i])
{
sdir[i] = -gvec[i];
}
else
{
sdir[i] = 0;
}
}
}
private double gradientProjection(double[] sdir, double[] gvec)
{
double s = 0;
double n = gvec.length;
for (int i = 0; i < n; i++)
{
s += gvec[i]*sdir[i];
}
return s;
}
private void printVec(String s, double[] x)
{
System.out.println(s);
for (int i=0; i < x.length; i++)
{
System.out.print(x[i] + " ");
}
System.out.println();
}
}