/* * ConjugateDirectionSearch.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.math; /** * methods for minimization of a real-valued function of * several variables without using derivatives * - algorithm is based on Brent's modification of a conjugate direction * search method proposed by Powell (praxis) * Richard P. Brent. 1973. Algorithms for finding zeros and extrema * * @author Korbinian Strimmer */ public class ConjugateDirectionSearch extends MultivariateMinimum { // // Public stuff // /** * constructor */ public ConjugateDirectionSearch() { } // Variables that control aspects of the inner workings of the // minimization algorithm. Setting them is optional, they // are all set to some reasonable default values given below. /** * controls the printed output from the routine * (0 -> no output, 1 -> print only starting and final values, * 2 -> detailed map of the minimization process, * 3 -> print also eigenvalues and vectors of the * search directions), the default value is 0 */ public int prin = 0; /** * step is a steplength parameter and should be set equal * to the expected distance from the solution. * exceptionally small or large values of step lead to * slower convergence on the first few iterations * the default value for step is 1.0 */ public double step = 1.0; /** * scbd is a scaling parameter. 1.0 is the default and * indicates no scaling. if the scales for the different * parameters are very different, scbd should be set to * a value of about 10.0. */ public double scbd = 1.0; /** * illc should be set to true * if the problem is known to * be ill-conditioned. the default is false. this * variable is automatically set, when the problem * is found to to be ill-conditioned during iterations. */ public boolean illc = false; // implementation of abstract method public void optimize(MultivariateFunction f, double[] xvector, double tolfx, double tolx) { optimize(f,xvector,tolfx,tolx,null); } public void optimize(MultivariateFunction f, double[] xvector, double tolfx, double tolx, MinimiserMonitor monitor) { t = tolx; fun = f; x = xvector; checkBounds(x); h = step; dim = fun.getNumArguments(); d = new double[dim]; y = new double[dim]; z = new double[dim]; q0 = new double[dim]; q1 = new double[dim]; v = new double[dim][dim]; tflin = new double[dim]; small = MachineAccuracy.EPSILON*MachineAccuracy.EPSILON; vsmall = small*small; large = 1.0/small; vlarge = 1.0/vsmall; ldfac = (illc ? 0.1 : 0.01); nl = kt = 0; numFun = 1; fx = fun.evaluate(x); stopCondition(fx, x, tolfx, tolx, true); qf1 = fx; t2 = small + Math.abs(t); t = t2; dmin = small; if (h < 100.0*t) h = 100.0*t; ldt = h; for (i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { v[i][j] = (i == j ? 1.0 : 0.0); } } d[0] = 0.0; qd0 = 0.0; for (i = 0; i < dim; i++) q1[i] = x[i]; if (prin > 1) { System.out.println("\n------------- enter function praxis -----------\n"); System.out.println("... current parameter settings ..."); System.out.println("... scaling ... " + scbd); System.out.println("... tolx ... " + t); System.out.println("... tolfx ... " + tolfx); System.out.println("... maxstep ... " + h); System.out.println("... illc ... " + illc); System.out.println("... maxFun ... " + maxFun); } if (prin > 0) System.out.println(); while(true) { sf = d[0]; s = d[0] = 0.0; /* minimize along first direction */ min1 = d[0]; min2 = s; min(0, 2, fx, false); d[0] = min1; s = min2; if (s <= 0.0) for (i = 0; i < dim; i++) { v[i][0] = -v[i][0]; } if ((sf <= (0.9 * d[0])) || ((0.9 * sf) >= d[0])) for (i=1; i < dim; i++) d[i] = 0.0; boolean gotoFret = false; for (k=1; k < dim; k++) { for (i=0; i< dim; i++) { y[i] = x[i]; } sf = fx; illc = illc || (kt > 0); boolean gotoNext; do { kl = k; df = 0.0; if (illc) { /* random step to get off resolution valley */ for (i=0; i < dim; i++) { z[i] = (0.1 * ldt + t2 * Math.pow(10.0,(double)kt)) * (MathUtils.nextDouble() - 0.5); s = z[i]; for (j=0; j < dim; j++) { x[j] += s * v[j][i]; } } checkBounds(x); fx = fun.evaluate(x); numFun++; } /* minimize along non-conjugate directions */ for (k2=k; k2 < dim; k2++) { sl = fx; s = 0.0; min1 = d[k2]; min2 = s; min(k2, 2, fx, false); d[k2] = min1; s = min2; if (illc) { double szk = s + z[k2]; s = d[k2] * szk*szk; } else s = sl - fx; if (df < s) { df = s; kl = k2; } } if (!illc && (df < Math.abs(100.0 * MachineAccuracy.EPSILON * fx))) { illc = true; gotoNext = true; } else gotoNext = false; } while (gotoNext); if ((k == 1) && (prin > 1)) vecprint("\n... New Direction ...", d); /* minimize along conjugate directions */ for (k2=0; k2<=k-1; k2++) { s = 0.0; min1 = d[k2]; min2 = s; min(k2, 2, fx, false); d[k2] = min1; s = min2; } f1 = fx; fx = sf; lds = 0.0; for (i=0; i<dim; i++) { sl = x[i]; x[i] = y[i]; y[i] = sl - y[i]; sl = y[i]; lds = lds + sl*sl; } checkBounds(x); lds = Math.sqrt(lds); if (lds > small) { for (i=kl-1; i>=k; i--) { for (j=0; j < dim; j++) v[j][i+1] = v[j][i]; d[i+1] = d[i]; } d[k] = 0.0; for (i=0; i < dim; i++) v[i][k] = y[i] / lds; min1 = d[k]; min2 = lds; min(k, 4, f1, true); d[k] = min1; lds = min2; if (lds <= 0.0) { lds = -lds; for (i=0; i< dim; i++) v[i][k] = -v[i][k]; } } ldt = ldfac * ldt; if (ldt < lds) ldt = lds; if (prin > 1) print(); if(monitor!=null) { monitor.newMinimum(fx,x); } if(stopCondition(fx, x, tolfx, tolx, false)) { kt++; } else { kt = 0; } if (kt > 1) { gotoFret = true; break; } } if (gotoFret) break; /* try quadratic extrapolation in case */ /* we are stuck in a curved valley */ quadr(); dn = 0.0; for (i=0; i < dim; i++) { d[i] = 1.0 / Math.sqrt(d[i]); if (dn < d[i]) dn = d[i]; } if (prin > 2) matprint("\n... New Matrix of Directions ...",v); for (j=0; j < dim; j++) { s = d[j] / dn; for (i=0; i < dim; i++) v[i][j] *= s; } if (scbd > 1.0) { /* scale axis to reduce condition number */ s = vlarge; for (i=0; i < dim; i++) { sl = 0.0; for (j=0; j < dim; j++) sl += v[i][j]*v[i][j]; z[i] = Math.sqrt(sl); if (z[i] < MachineAccuracy.SQRT_SQRT_EPSILON) z[i] = MachineAccuracy.SQRT_SQRT_EPSILON; if (s > z[i]) s = z[i]; } for (i=0; i < dim; i++) { sl = s / z[i]; z[i] = 1.0 / sl; if (z[i] > scbd) { sl = 1.0 / scbd; z[i] = scbd; } } } for (i=1; i < dim; i++) for (j=0; j<=i-1; j++) { s = v[i][j]; v[i][j] = v[j][i]; v[j][i] = s; } minfit(dim, MachineAccuracy.EPSILON, vsmall, v, d); if (scbd > 1.0) { for (i=0; i < dim; i++) { s = z[i]; for (j=0; j < dim; j++) v[i][j] *= s; } for (i=0; i < dim; i++) { s = 0.0; for (j=0; j < dim; j++) s += v[j][i]*v[j][i]; s = Math.sqrt(s); d[i] *= s; s = 1.0 / s; for (j=0; j < dim; j++) v[j][i] *= s; } } for (i=0; i < dim; i++) { if ((dn * d[i]) > large) d[i] = vsmall; else if ((dn * d[i]) < small) d[i] = vlarge; else d[i] = Math.pow(dn * d[i],-2.0); } sort(); /* the new eigenvalues and eigenvectors */ dmin = d[dim-1]; if (dmin < small) dmin = small; illc = (MachineAccuracy.SQRT_EPSILON * d[0]) > dmin; if ((prin > 2) && (scbd > 1.0)) vecprint("\n... Scale Factors ...",z); if (prin > 2) vecprint("\n... Eigenvalues of A ...",d); if (prin > 2) matprint("\n... Eigenvectors of A ...",v); if ((maxFun > 0) && (nl > maxFun)) { if (prin > 0) System.out.println("\n... maximum number of function calls reached ..."); break; } } if (prin > 0) { vecprint("\n... Final solution is ...", x); System.out.println("\n... Function value reduced to " + fx + " ..."); System.out.println("... after " + numFun + " function calls."); } //return (fx); } // // Private stuff // // some global variables private int i, j, k, k2, nl, kl, kt; private double s, sl, dn, dmin, fx, f1, lds, ldt, sf, df, qf1, qd0, qd1, qa, qb, qc, small, vsmall, large, vlarge, ldfac, t2; // need to be initialised private double[] d; private double[] y; private double[] z; private double[] q0; private double[] q1; private double[][] v; private double[] tflin; private int dim; private double[] x; private MultivariateFunction fun; // these will be set by praxis to the global control parameters private double h, t; // sort d and v in descending order private void sort() { int k, i, j; double s; for (i=0; i < dim-1; i++) { k = i; s = d[i]; for (j=i+1; j < dim; j++) { if (d[j] > s) { k = j; s = d[j]; } } if (k > i) { d[k] = d[i]; d[i] = s; for (j=0; j < dim; j++) { s = v[j][i]; v[j][i] = v[j][k]; v[j][k] = s; } } } } private void vecprint(String s, double[] x) { System.out.println(s); for (int i=0; i < x.length; i++) System.out.print(x[i] + " "); System.out.println(); } private void print() /* print a line of traces */ { System.out.println(); System.out.println("... function value reduced to ... " + fx); System.out.println("... after " + numFun + " function calls ..."); System.out.println("... including " + nl + " linear searches ..."); vecprint("... current values of x ...", x); } private void matprint(String s, double[][] v) { System.out.println(s); for (int k=0; k<v.length; k++) { for (int i=0; i<v.length; i++) { System.out.print(v[k][i] + " "); } System.out.println(); } } private double flin(double l, int j) { if (j != -1) { /* linear search */ for (int i = 0; i < dim; i++) tflin[i] = x[i] + l*v[i][j]; } else { /* search along parabolic space curve */ qa = l*(l-qd1)/(qd0*(qd0+qd1)); qb = (l+qd0)*(qd1-l)/(qd0*qd1); qc = l*(l+qd0)/(qd1*(qd0+qd1)); for (int i = 0; i < dim; i++) { tflin[i] = qa*q0[i]+qb*x[i]+qc*q1[i]; } } checkBounds(tflin); numFun++; return fun.evaluate(tflin); } private void checkBounds(double[] p) { for (int i = 0; i < dim; i++) { if (p[i] < fun.getLowerBound(i)) { p[i] = fun.getLowerBound(i); } if (p[i] > fun.getUpperBound(i)) { p[i] = fun.getUpperBound(i); } } } private double min1; private double min2; private void min(int j, int nits, double f1, boolean fk) { int k; double x2, xm, f0, f2, fm, d1, t2, s, sf1, sx1; sf1 = f1; sx1 = min2; k = 0; xm = 0.0; fm = f0 = fx; boolean dz = (min1 < MachineAccuracy.EPSILON); /* find step size */ s = 0; for (int i=0; i < dim; i++) { s += x[i]*x[i]; } s = Math.sqrt(s); if (dz) { t2 = MachineAccuracy.SQRT_SQRT_EPSILON* Math.sqrt(Math.abs(fx)/dmin + s*ldt) + MachineAccuracy.SQRT_EPSILON*ldt; } else { t2 = MachineAccuracy.SQRT_SQRT_EPSILON* Math.sqrt(Math.abs(fx)/(min1) + s*ldt) + MachineAccuracy.SQRT_EPSILON*ldt; } s = s*MachineAccuracy.SQRT_SQRT_EPSILON + t; if (dz && t2 > s) t2 = s; if (t2 < small) t2 = small; if (t2 > 0.01*h) t2 = 0.01 * h; if (fk && f1 <= fm) { xm = min2; fm = f1; } if (!fk || Math.abs(min2) < t2) { min2 = (min2 > 0 ? t2 : -t2); f1 = flin(min2, j); } if (f1 <= fm) { xm = min2; fm = f1; } boolean gotoNext; do { if (dz) { x2 = (f0 < f1 ? -(min2) : 2*(min2)); f2 = flin(x2, j); if (f2 <= fm) { xm = x2; fm = f2; } min1 = (x2*(f1-f0) - (min2)*(f2-f0))/((min2)*x2*((min2)-x2)); } d1 = (f1-f0)/(min2) - min2*min1; dz = true; if (min1 <= small) { x2 = (d1 < 0 ? h : -h); } else { x2 = - 0.5*d1/(min1); } if (Math.abs(x2) > h) x2 = (x2 > 0 ? h : -h); f2 = flin(x2, j); gotoNext = false; while ((k < nits) && (f2 > f0)) { k++; if ((f0 < f1) && (min2*x2 > 0.0)) { gotoNext = true; break; } x2 *= 0.5; f2 = flin(x2, j); } } while (gotoNext); nl++; if (f2 > fm) x2 = xm; else fm = f2; if (Math.abs(x2*(x2-min2)) > small) { min1 = (x2*(f1-f0) - min2*(fm-f0))/(min2*x2*(min2-x2)); } else { if (k > 0) min1 = 0; } if (min1 <= small) min1 = small; min2 = x2; fx = fm; if (sf1 < fx) { fx = sf1; min2 = sx1; } if (j != -1) { for (i=0; i < dim; i++) { x[i] += (min2)*v[i][j]; } checkBounds(x); } } // Look for a minimum along the curve q0, q1, q2 private void quadr() { int i; double l, s; s = fx; fx = qf1; qf1 = s; qd1 = 0.0; for (i=0; i < dim; i++) { s = x[i]; l = q1[i]; x[i] = l; q1[i] = s; qd1 = qd1 + (s-l)*(s-l); } s = 0.0; qd1 = Math.sqrt(qd1); l = qd1; if (qd0>0.0 && qd1>0.0 &&nl>=3*dim*dim) { min1 = s; min2 = l; min(-1, 2, qf1, true); s = min1; l = min2; qa = l*(l-qd1)/(qd0*(qd0+qd1)); qb = (l+qd0)*(qd1-l)/(qd0*qd1); qc = l*(l+qd0)/(qd1*(qd0+qd1)); } else { fx = qf1; qa = qb = 0.0; qc = 1.0; } qd0 = qd1; for (i=0; i<dim; i++) { s = q0[i]; q0[i] = x[i]; x[i] = qa*s + qb*x[i] + qc*q1[i]; } checkBounds(x); } // Singular value decomposition private void minfit(int n, double eps, double tol, double[][] ab, double[] q) { int l=0, kt, l2, i, j, k; double c, f, g, h, s, x, y, z; double[] e = new double[dim]; /* householder's reduction to bidiagonal form */ x = g = 0.0; for (i=0; i<n; i++) { e[i] = g; s = 0.0; l = i+1; for (j=i; j<n; j++) s += ab[j][i] * ab[j][i]; if (s < tol) { g = 0.0; } else { f = ab[i][i]; if (f < 0.0) g = Math.sqrt(s); else g = -Math.sqrt(s); h = f*g - s; ab[i][i] = f - g; for (j=l; j<n; j++) { f = 0.0; for (k=i; k<n; k++) f += ab[k][i] * ab[k][j]; f /= h; for (k=i; k<n; k++) ab[k][j] += f * ab[k][i]; } } q[i] = g; s = 0.0; if (i < n) for (j=l; j<n; j++) s += ab[i][j] * ab[i][j]; if (s < tol) { g = 0.0; } else { f = ab[i][i+1]; if (f < 0.0) g = Math.sqrt(s); else g = - Math.sqrt(s); h = f*g - s; ab[i][i+1] = f - g; for (j=l; j<n; j++) e[j] = ab[i][j]/h; for (j=l; j<n; j++) { s = 0; for (k=l; k<n; k++) s += ab[j][k]*ab[i][k]; for (k=l; k<n; k++) ab[j][k] += s * e[k]; } } y = Math.abs(q[i]) + Math.abs(e[i]); if (y > x) x = y; } /* accumulation of right hand transformations */ for (i=n-1; i >= 0; i--) { if (g != 0.0) { h = ab[i][i+1]*g; for (j=l; j<n; j++) ab[j][i] = ab[i][j] / h; for (j=l; j<n; j++) { s = 0.0; for (k=l; k<n; k++) s += ab[i][k] * ab[k][j]; for (k=l; k<n; k++) ab[k][j] += s * ab[k][i]; } } for (j=l; j<n; j++) ab[i][j] = ab[j][i] = 0.0; ab[i][i] = 1.0; g = e[i]; l = i; } /* diagonalization to bidiagonal form */ eps *= x; boolean converged = false; for (k=n-1; k>= 0; k--) { kt = 0; do { kt++; boolean skipNext = false; for (l2=k; l2>=0; l2--) { l = l2; if (Math.abs(e[l]) <= eps) { skipNext = true; break; } if (Math.abs(q[l-1]) <= eps) break; } if (skipNext == false) { c = 0.0; s = 1.0; for (i=l; i<=k; i++) { f = s * e[i]; e[i] *= c; if (Math.abs(f) <= eps) break; g = q[i]; if (Math.abs(f) < Math.abs(g)) { double fg = f/g; h = Math.abs(g)*Math.sqrt(1.0+fg*fg); } else { double gf = g/f; h = (f!=0.0 ? Math.abs(f)*Math.sqrt(1.0+gf*gf) : 0.0); } q[i] = h; if (h == 0.0) { h = 1.0; g = 1.0; } c = g/h; s = -f/h; } } z = q[k]; if (l == k) { converged = true; break; } /* shift from bottom 2x2 minor */ x = q[l]; y = q[k-l]; g = e[k-1]; h = e[k]; f = ((y-z)*(y+z) + (g-h)*(g+h)) / (2.0*h*y); g = Math.sqrt(f*f+1.0); if (f <= 0.0) f = ((x-z)*(x+z) + h*(y/(f-g)-h))/x; else f = ((x-z)*(x+z) + h*(y/(f+g)-h))/x; /* next qr transformation */ s = c = 1.0; for (i=l+1; i<=k; i++) { g = e[i]; y = q[i]; h = s*g; g *= c; if (Math.abs(f) < Math.abs(h)) { double fh = f/h; z = Math.abs(h) * Math.sqrt(1.0 + fh*fh); } else { double hf = h/f; z = (f!=0.0 ? Math.abs(f)*Math.sqrt(1.0+hf*hf) : 0.0); } e[i-1] = z; if (z == 0.0) f = z = 1.0; c = f/z; s = h/z; f = x*c + g*s; g = - x*s + g*c; h = y*s; y *= c; for (j=0; j<n; j++) { x = ab[j][i-1]; z = ab[j][i]; ab[j][i-1] = x*c + z*s; ab[j][i] = - x*s + z*c; } if (Math.abs(f) < Math.abs(h)) { double fh = f/h; z = Math.abs(h) * Math.sqrt(1.0 + fh*fh); } else { double hf = h/f; z = (f!=0.0 ? Math.abs(f)*Math.sqrt(1.0+hf*hf) : 0.0); } q[i-1] = z; if (z == 0.0) z = f = 1.0; c = f/z; s = h/z; f = c*g + s*y; x = - s*g + c*y; } e[l] = 0.0; e[k] = f; q[k] = x; } while (kt <= 30); if (!converged) { e[k] = 0.0; System.out.println("\n+++ qr failed\n"); System.exit(1); } if (z < 0.0) { q[k] = - z; for (j=0; j<n; j++) ab[j][k] = - ab[j][k]; } } } }