/*
* Skyline.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.evolution.coalescent;
import dr.evolution.tree.Tree;
import dr.evolution.util.Units;
import dr.math.Binomial;
/**
* Skyline plot derived from a strictly bifurcating tree
* or a coalescent interval.
*
* This class provides the "classic" skyline plot method by
* Pybus, Rambaut and Harvey 2000. Genetics 155:1429-1437, as well
* as the "generalized" skyline plot method described in
* Strimmer and Pybus. 2001. MBE submitted.
*
* @version $Id: Skyline.java,v 1.15 2005/05/24 20:25:56 rambaut Exp $
*
* @author Korbinian Strimmer
*/
public class Skyline implements Units {
//
// Public stuff
//
Skyline() {}
/**
* Construct skyline plot from tree
*
* @param epsilon smoothing parameter (if set < 0 then epsilon will be optimized)
*/
public Skyline(Tree tree, double epsilon)
{
this(new TreeIntervals(tree), 1.0, epsilon);
}
/**
* Construct skyline plot from tree
*
* @param epsilon smoothing parameter (if set < 0 then epsilon will be optimized)
*/
public Skyline(Tree tree, double mutationRate, double epsilon)
{
this(new TreeIntervals(tree), mutationRate, epsilon);
}
/**
* Construct skyline plot from given coalescent intervals
*
* @param epsilon smoothing parameter (if set < 0 then epsilon will be optimized)
*/
public Skyline(IntervalList intervals, double mutationRate, double epsilon)
{
init(intervals, mutationRate, epsilon);
}
void init(IntervalList intervals, double mutationRate, double epsilon) {
if (!intervals.isBinaryCoalescent()) {
throw new IllegalArgumentException("All intervals must contain only a single coalescent");
}
mu = mutationRate;
size = intervals.getIntervalCount();
this.intervals = intervals;
// population size in each coalescent interval
populationSize = new double[size];
// cumulative interval sizes
cis = new double[size];
maxTime = 0.0;
for (int i = 0; i < size; i++) {
cis[i] = maxTime;
maxTime += intervals.getInterval(i) / mu;
}
if (epsilon == 0.0) {
/* init with classic skyline plot */
computeClassic();
} else if (epsilon > 0.0) {
/* init with generalized skyline plot */
computeGeneralized(epsilon);
} else {
// find optimal generalized skyline plot
optimize();
}
}
public String toString()
{
StringBuffer buffer = new StringBuffer("Skyline Plot ");
if (eps == 0.0) {
buffer.append("(classic): ");
} else {
buffer.append("(generalized): ");
buffer.append("epsilon = " + eps + ", ");
}
buffer.append("log L = " + getLogLikelihood());
if (params > size-2) {
buffer.append("\tlog L(AICC) not available");
} else {
buffer.append("\tlog L(AICC) = " + getAICC());
}
return buffer.toString();
}
/**
* Compute classic skyline plot
*/
public void computeClassic()
{
int i = 0;
int k = 0;
do {
double popSize = 0.0;
boolean done = false;
k = i;
do {
double w = intervals.getInterval(i) / mu;
int n = intervals.getLineageCount(i);
if (n < 0) n = 0;
done = intervals.getIntervalType(i) == IntervalType.COALESCENT;
popSize += w * Binomial.choose2(n);
i++;
} while (i < size && !done);
for (int j = k; j < i; j++) {
populationSize[j] = popSize;
}
} while (i < size);
params = k;
eps = 0.0;
}
/**
* Compute generalized skyline plot
*/
public void computeGeneralized(double epsilon)
{
params = 0;
double cw = 0; //cumulative w
for (int i = 0; i < size; i++) {
int n = intervals.getLineageCount(i);
if (n < 0) n = 0;
double w = intervals.getInterval(i) / mu;
int start = i;
int k = 1;
while ((w < epsilon && i < size-1) ||
intervals.getIntervalType(i) != IntervalType.COALESCENT) {
i++;
k++;
w += intervals.getInterval(i) / mu;
//System.out.println(ci.getInterval(i));
}
//System.out.println("w=" + w + " k=" + k + " i=" + i);
// if remainder is smaller than epsilon
// continue pooling until the end
if (maxTime - cw - w < epsilon) {
for (int j = i+1; j < size; j++)
{
i++;
k++;
w += intervals.getInterval(i) / mu;
}
}
double m = w * Binomial.choose2(n) / k;
// assign the same pop.size to all sub intervals
for (int j = start; j < start+k; j++) {
populationSize[j] = m;
}
params++;
cw += w;
}
eps = epsilon;
}
/**
* Optimize generalized skyline plot
*/
public void optimize()
{
// this is the naive way of doing this ...
double besteps = getMaxTime();
computeGeneralized(besteps);
double bestaicc = getAICC();
int GRID = 1000;
double delta = besteps/GRID;
double MINEPS = 1e-6;
// Why MINEPS?
// Because most "clock-like" trees are not properly
// clock-like for a variety of reasons, i.e. the heights
// of the tips are not exactly zero.
eps = eps - delta;
while(eps > MINEPS)
{
computeGeneralized(eps);
double aicc = getAICC();
if (aicc > bestaicc && params < size-1)
{
besteps = eps;
bestaicc = aicc;
}
eps = eps - delta;
}
computeGeneralized(besteps);
}
/**
* Compute log-likelihood
*/
public double getLogLikelihood()
{
double logL = 0.0;
for (int i = 0; i < size; i++) {
double w = intervals.getInterval(i);
double m = populationSize[i];
double n = intervals.getIntervalCount();
double nc2 = n*(n-1.0)/2.0;
if (intervals.getIntervalType(i) == IntervalType.COALESCENT) {
logL += Math.log(nc2/m) - w*nc2/m;
} else {
logL -= w*nc2/m;
}
}
return logL;
}
/**
* Compute AICC-corrected log-likelihood
*/
public double getAICC()
{
double logL = getLogLikelihood();
return AICC(logL, params, size);
}
/**
* Second-order Akaike (AICC) correction (Hurvich and Tsai 1989)
*
* @param l log-likelihood
* @param k number of inferred parameters
* @param n sample size
*
* @return l - k - (k(k+1))/(n - k - 1)
*/
public static double AICC(double l, int k, int n)
{
if (k > n-2) throw new IllegalArgumentException("k must be smaller than n-1");
return l - k - (double) (k*(k+1.0))/ (double) (n - k - 1.0) ;
}
/**
* Find interval corresponding to a specific time
*/
public double findInterval(double time)
{
if (time < 0) throw new IllegalArgumentException("Negative values for time are not allowed");
for (int i = 0; i < size-1; i++)
{
if (time >= cis[i] && time < cis[i+1]) return i;
}
return size-1;
}
/**
* Returns the largest value of time defined in this plot
* (= maximum value for epsilon)
*/
public double getMaxTime()
{
return maxTime;
}
/**
* Returns the largest estimate of population size.
*/
public double getMaxPopulationSize() {
double max = 0.0;
for (int i = 0; i < size; i++) {
if (populationSize[i] > max) {
max = populationSize[i];
}
}
return max;
}
/**
* Returns the coalescent intervals in this skyline plot.
*/
public IntervalList getIntervals() {
return intervals;
}
/**
* Returns the number of intervals in this skyline plot.
*/
public int getSize() {
return size;
}
/**
* Returns the number of composite intervals (=number of parameters).
*/
public int getParameterCount() {
return params;
}
/**
* Returns epsilon
*/
public double getEpsilon() {
return eps;
}
/**
* Returns the population size in interval i.
*/
public double getPopulationSize(int i) {
return populationSize[i];
}
/**
* Return the units
*/
public final Type getUnits() {
return intervals.getUnits();
}
/**
* Sets the units
*/
public final void setUnits(Type units) {
throw new IllegalArgumentException("Can't set skyline's units");
}
// private
private IntervalList intervals;
private int size;
private double maxTime;
private double eps;
private double mu;
private int params;
/** cummulative interval sizes */
private double[] cis;
/** estimated population size in a coalescent interval */
private double[] populationSize;
}