/*
* AlloppMulLabTree.java
*
* Copyright (c) 2002-2017 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.evomodel.alloppnet.speciation;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Formatter;
import java.util.Locale;
import java.util.Stack;
import dr.evolution.tree.*;
import jebl.util.FixedBitSet;
import dr.evolution.util.Taxon;
import dr.inference.model.Parameter;
import dr.evomodel.alloppnet.util.AlloppMisc;
/**
* An AlloppMulLabTree represents the species network as single
* binary tree with tips that can be multiply labelled with species.
*
* @author Graham Jones
* Date: 13/09/2011
*/
/*
* An AlloppMulLabTree represents the species network as single
* binary tree with tips that can be multiply labelled with species.
*
* classes LegLink and FootLinks are for gathering and organising
* the links between trees of different ploidy, so that the
* rootward-pointing legs can become tipward-pointing branches.
*
* SpSqUnion is used for sorting the nodes in an AlloppMulLabTree. It is
* used by Comparator SPUNION_ORDER, and hence indirectly by
* fillinpopvals().
*
* class BranchPopulationAndLineages records the information needed
* to calculate the probability of coalescences in a single branch of the
* AlloppMulLabTree.
*
*/
/* mlnodes[], rootn implement the tree; nextn is for building it
*
* apsp references the (species, indivs, sequences) structure.
*
* popvals references the population parameters. fillinpopvals() assigns
* them to branches.
*
* simptree is so that AlloppSpeciesNetworkModel, which contains a AlloppMulLabTree,
* can implement the Tree interface.
*/
public class AlloppMulLabTree {
private MulLabNode[] mlnodes;
private int rootn;
private int nextn;
private AlloppSpeciesBindings apsp;
private Parameter tippopvals;
private Parameter rootpopvals;
private double [] hybpopvals;
private int nofhybpopvals;
public SimpleTree simptree;
/*
* parent, child[] join the nodes into a binary tree.
*
* height is time into past
*
* popsize is population at start of branch, and for tips, also at end.
*
* union is a set of species for a single choice of sequence copy from
* each individual of the species. There is one bit for each Taxon like
* "c0" or "c1"
*/
private class MulLabNode extends AlloppNode.Abstract implements AlloppNode, NodeRef {
private int nodeNumber;
private int anc;
private int lft;
private int rgt;
private double height;
private boolean tetraroot; // true iff this node is root of a teraploid subtree
private boolean intetratree; // true iff this is in a tetratree
private boolean tetraancestor; // true iff this is a node with no diploid tip descendants
private int ttreeindex;
private double hybridheight;
private FixedBitSet union;
private ArrayList<Double> coalheights;
private int nlineages;
private Taxon taxon;
private int tippopindex;
private int hybpopindex;
private int rootpopindex;
// dud constuctor
MulLabNode(int nn) {
nodeNumber = nn;
anc = -1;
lft = -1;
rgt = -1;
height = -1.0;
tetraroot = false;
intetratree = false;
tetraancestor = false;
ttreeindex = -1;
hybridheight = -1.0;
coalheights = new ArrayList<Double>();
taxon = new Taxon("");
tippopindex = -1;
hybpopindex = -1;
rootpopindex = -1;
}
public double tippop() {
return tippopvals.getParameterValue(tippopindex);
}
public double hybpop() {
return hybpopvals[hybpopindex];
}
public double rootpop() {
return rootpopvals.getParameterValue(rootpopindex);
}
@Override
public String asText(int indentlen) {
StringBuilder s = new StringBuilder();
Formatter formatter = new Formatter(s, Locale.US);
if (lft < 0) {
formatter.format("%s ", taxon.getId());
} else {
formatter.format("%s ", "+");
}
while (s.length() < 20-indentlen) {
formatter.format("%s", " ");
}
formatter.format("%s ", AlloppMisc.nonnegIn8Chars(height));
formatter.format("%60s ", AlloppMisc.FixedBitSetasText(union));
double tippop = (tippopindex >= 0) ? tippop() : -1.0;
formatter.format("%s %s ", AlloppMisc.nonnegIntIn2Chars(tippopindex), AlloppMisc.nonnegIn8Chars(tippop));
double hybpop = (hybpopindex >= 0) ? hybpop() : -1.0;
formatter.format("%s %s ", AlloppMisc.nonnegIntIn2Chars(hybpopindex), AlloppMisc.nonnegIn8Chars(hybpop));
double rootpop = (rootpopindex >= 0) ? rootpop() : -1.0;
formatter.format("%s %s ", AlloppMisc.nonnegIntIn2Chars(rootpopindex), AlloppMisc.nonnegIn8Chars(rootpop));
formatter.format("%s ", tetraroot ? "tetroot" : " ");
formatter.format("%s ", AlloppMisc.nonnegIn8Chars(hybridheight));
formatter.format("%3d ", nlineages);
for (int c = 0; c < coalheights.size(); c++) {
formatter.format(AlloppMisc.nonnegIn8Chars(coalheights.get(c)) + ",");
}
return s.toString();
}
@Override
public int nofChildren() {
return ((lft < 0) ? 0 : 2);
}
@Override
public AlloppNode getChild(int ch) {
return ch==0 ? mlnodes[lft] : mlnodes[rgt];
}
@Override
public AlloppNode getAnc() {
return mlnodes[anc];
}
@Override
public double getHeight() {
return height;
}
@Override
public FixedBitSet getUnion() {
return union;
}
@Override
public void setChild(int ch, AlloppNode newchild) {
int newch = ((MulLabNode)newchild).nodeNumber;
if (ch == 0) {
lft = newch;
} else {
rgt = newch;
}
}
@Override
public void setAnc(AlloppNode anc) {
this.anc = ((MulLabNode)anc).nodeNumber;
}
@Override
public Taxon getTaxon() {
return taxon;
}
@Override
public void setTaxon(String name) {
taxon = new Taxon(name);
}
@Override
public void setHeight(double height) {
this.height = height;
}
@Override
public void setUnion(FixedBitSet union) {
this.union = union;
}
@Override
public void addChildren(AlloppNode c0, AlloppNode c1) {
lft = ((MulLabNode)c0).nodeNumber;
mlnodes[lft].anc = nodeNumber;
rgt = ((MulLabNode)c1).nodeNumber;
mlnodes[rgt].anc = nodeNumber;
}
@Override
public int getNumber() {
return nodeNumber;
}
@Override
public void setNumber(int nn) {
nodeNumber = nn;
}
}
private class SpSqUnion {
public FixedBitSet spsqunion;
public FixedBitSet spunion;
public SpSqUnion(FixedBitSet spsqunion) {
this.spsqunion = spsqunion;
spunion = apsp.spsqunion2spunion(spsqunion);
}
}
private class PopulationAndLineages {
public double t[];
public double tippop;
public double rootpop;
public int tipnlin;
public PopulationAndLineages(double[] t2, double tippop, double rootpop,
int tipnlin) {
this.t = t2;
this.tippop = tippop;
this.rootpop = rootpop;
this.tipnlin = tipnlin;
}
public double populationAt(double x) {
final double begt = t[0];
final double endt = t[t.length - 1];
return ((endt-x)*tippop + (x-begt)*rootpop) / (endt-begt);
}
}
/*
* This constructor makes a single multiply labelled tree from the diploid
* history and set of tetraploid AlloppLeggedTrees which is passed to it. It is called directly
* by testing code.
*/
AlloppMulLabTree(AlloppDiploidHistory adhist, ArrayList<AlloppLeggedTree> tettrees,
AlloppSpeciesBindings apsp, Parameter tippopvals, Parameter rootpopvals, double [] hybpopvals) {
this.apsp = apsp;
this.tippopvals = tippopvals;
this.rootpopvals = rootpopvals;
this.hybpopvals = hybpopvals;
nofhybpopvals = tettrees.size();
// Count tips for the tree to be made
int ntips = 0;
ntips += adhist.getDiploidTipCount();
for (AlloppLeggedTree ttree : tettrees) {
ntips += 2 * ttree.getExternalNodeCount();
}
// Make array of dud nodes
mlnodes = new MulLabNode[2 * ntips - 1];
for (int i = 0; i < mlnodes.length; i++) {
mlnodes[i] = new MulLabNode(i);
mlnodes[i].tetraancestor = true;
}
// get dip hist and tetratrees into array; unions are filled at tips
nextn = 0;
nextn = subtree2MulLabNodes(adhist, adhist.getRootIndex(), tettrees, apsp);
rootn = nextn - 1;
// fill unions from tips
mlnodes[rootn].fillinUnionsInSubtree(apsp.numberOfSpSeqs());
// The tetraroot flags were initialised false, set to final values by subtree2MulLabNodes()
// The tetraancestor flags were initialised true, diploid tips later set false by subtree2MulLabNodes()
// The intetratree flags were initialised false, tetraploid tips later set true by allopptree2MulLabNodes()
// Now fill in tetraancestor and intetratree flags from tips.
fillinTetraFlagsInSubtree(mlnodes[rootn]);
makesimpletree();
}
// constructor for testing conversion of diploid history plus tetraploid trees to MUL-tree
public AlloppMulLabTree(AlloppDiploidHistory adhist, ArrayList<AlloppLeggedTree> tettrees, AlloppSpeciesBindings apsp,
Parameter testtippopvalues, Parameter testrootpopvalues, double [] testhybpopvalues, int testcase) {
this(adhist, tettrees, apsp, testtippopvalues, testrootpopvalues, testhybpopvalues);
assert testcase >= 1;
assert testcase <= 5;
fillinpopvals();
}
// constructor for testing likelihood calculations.
// Makes a particular multree with nlineages, coalheights so test can call
// geneTreeInMULTreeLogLikelihood()
public AlloppMulLabTree(AlloppSpeciesBindings apsp) {
this.apsp = apsp;
nofhybpopvals = 1;
tippopvals = new Parameter.Default(3, .003);
rootpopvals = new Parameter.Default(4, .001);
hybpopvals = new double[1];
hybpopvals[0] = .001;
fillmlnodesforlhoodtest1();
fillinTetraFlagsInSubtree(mlnodes[rootn]);
mlnodes[rootn].fillinUnionsInSubtree(4);
}
public double testGeneTreeInMULTreeLogLikelihood() {
return geneTreeInMULTreeLogLikelihood();
}
private void fillmlnodesforlhoodtest1() {
mlnodes = new MulLabNode[7];
for (int i = 0; i < mlnodes.length; i++) {
mlnodes[i] = new MulLabNode(i);
}
// 4 tips, 2 dips, 1 tet
mlnodes[0].taxon = new Taxon("a");
mlnodes[1].taxon = new Taxon("b");
mlnodes[2].taxon = new Taxon("z0");
mlnodes[3].taxon = new Taxon("z1");
mlnodes[2].tetraancestor = true;
mlnodes[3].tetraancestor = true;
mlnodes[2].intetratree = true;
mlnodes[3].intetratree = true;
mlnodes[2].tetraroot = true;
mlnodes[3].tetraroot = true;
mlnodes[0].union = new FixedBitSet(4); mlnodes[0].union.set(0);
mlnodes[1].union = new FixedBitSet(4); mlnodes[1].union.set(1);
mlnodes[2].union = new FixedBitSet(4); mlnodes[2].union.set(2);
mlnodes[3].union = new FixedBitSet(4); mlnodes[3].union.set(3);
// toplogy and times
mlnodes[4].addChildren(mlnodes[0], mlnodes[2]);
mlnodes[5].addChildren(mlnodes[1], mlnodes[3]);
mlnodes[6].addChildren(mlnodes[4], mlnodes[5]);
rootn = 6;
mlnodes[0].height = 0.0;
mlnodes[1].height = 0.0;
mlnodes[2].height = 0.0;
mlnodes[3].height = 0.0;
mlnodes[4].height = 0.01;
mlnodes[5].height = 0.02;
mlnodes[6].height = 0.03;
mlnodes[2].hybridheight = 0.005;
mlnodes[3].hybridheight = 0.005;
// nlineages and coalheights
mlnodes[0].nlineages = 1;
mlnodes[1].nlineages = 2;
mlnodes[2].nlineages = 1;
mlnodes[3].nlineages = 1;
mlnodes[4].nlineages = 2;
mlnodes[4].coalheights.add(0.015);
mlnodes[5].nlineages = 3;
mlnodes[5].coalheights.add(0.025);
mlnodes[6].nlineages = 3;
mlnodes[6].coalheights.add(0.035);
mlnodes[6].coalheights.add(0.045);
}
// For testing.
boolean mullabtreeOK() {
int nroots = 0;
for (int i = 0; i < mlnodes.length; i++) {
if (mlnodes[i].anc < 0) {
nroots++;
}
}
if (nroots != 1) {
return false;
}
for (int i = 0; i < mlnodes.length; i++) {
int nparents = 0;
for (int j = 0; j < mlnodes.length; j++) {
if (mlnodes[j].lft == i) { nparents++; }
if (mlnodes[j].rgt == i) { nparents++; }
}
if (mlnodes[i].anc < 0 && nparents != 0) {
return false;
}
if (mlnodes[i].anc >= 0 && nparents != 1) {
return false;
}
}
for (int i = 0; i < mlnodes.length; i++) {
if (mlnodes[i].getNumber() != i) {
return false;
}
}
for (int i = 0; i < mlnodes.length; i++) {
if (mlnodes[i].lft >= 0) {
if (mlnodes[i].rgt < 0) {
return false;
}
int lft = mlnodes[i].lft;
int rgt = mlnodes[i].rgt;
if (mlnodes[lft].anc != i) {
return false;
}
if (mlnodes[rgt].anc != i) {
return false;
}
if (mlnodes[i].height <= mlnodes[lft].height) {
return false;
}
if (mlnodes[i].height <= mlnodes[rgt].height) {
return false;
}
} else {
if (mlnodes[i].height != 0) {
return false;
}
}
}
if (mlnodes[rootn].anc >= 0) {
return false;
}
return true;
}
String mullabTreeAsNewick() {
String s = TreeUtils.uniqueNewick(simptree, simptree.getRoot());
return s;
}
String asText() {
String header = "MUL-tree height union [] tippop [] hybpop [] rootpop tetroot hybhgt nlin coalheights" + System.getProperty("line.separator");
String s = "";
Stack<Integer> x = new Stack<Integer>();
return header + AlloppNode.Abstract.subtreeAsText(mlnodes[rootn], s, x, 0, "");
}
void clearCoalescences() {
clearSubtreeCoalescences(mlnodes[rootn]);
}
void recordLineageCounts() {
recordSubtreeLineageCounts(mlnodes[rootn]);
}
boolean coalescenceIsCompatible(double height, FixedBitSet union) {
MulLabNode node = (MulLabNode) mlnodes[rootn].nodeOfUnionInSubtree(union);
return (node.height <= height);
}
void recordCoalescence(double height, FixedBitSet union) {
MulLabNode node = (MulLabNode) mlnodes[rootn].nodeOfUnionInSubtree(union);
assert (node.height <= height);
while (node.anc >= 0 && mlnodes[node.anc].height <= height) {
node = mlnodes[node.anc];
}
node.coalheights.add(height);
}
void sortCoalescences() {
for (MulLabNode node : mlnodes) {
Collections.sort(node.coalheights);
}
}
double geneTreeInMULTreeLogLikelihood() {
fillinpopvals();
//System.out.println(asText());
return geneTreeInMULSubtreeLogLikelihood(mlnodes[rootn]);
}
/*
*
* ***************************************************
* Private
*/
private void fillinTetraFlagsInSubtree(AlloppNode node) {
if (node.nofChildren() == 2) {
MulLabNode mnode = (MulLabNode)node;
MulLabNode ch0 = (MulLabNode)node.getChild(0);
MulLabNode ch1 = (MulLabNode)node.getChild(1);
fillinTetraFlagsInSubtree(node.getChild(0));
fillinTetraFlagsInSubtree(node.getChild(1));
mnode.tetraancestor = (ch0.tetraancestor && ch1.tetraancestor);
mnode.intetratree = (ch0.intetratree && !ch0.tetraroot && ch1.intetratree && !ch1.tetraroot);
}
}
private int subtree2MulLabNodes(AlloppDiploidHistory adhist, int dhni, ArrayList<AlloppLeggedTree> tettrees, AlloppSpeciesBindings apsp) {
if (adhist.getLftFromIndex(dhni) < 0) {
int tt = adhist.getNodeTettree(dhni);
if (tt < 0) {
mlnodes[nextn].setTaxon(adhist.getTaxonFromIndex(dhni).getId());
mlnodes[nextn].setUnion(apsp.taxonseqToTipUnion(mlnodes[nextn].taxon, 0));
mlnodes[nextn].setHeight(adhist.getHeightFromIndex(dhni));
mlnodes[nextn].tetraancestor = false;
nextn++;
} else {
AlloppNode troot = (AlloppNode)tettrees.get(tt).getSlidableRoot();
int seq = (adhist.getNodeLeg(dhni) == AlloppDiploidHistory.LegLorR.left) ? 0 : 1;
nextn = allopptree2MulLabNodes(apsp, troot, seq);
mlnodes[nextn-1].hybridheight = adhist.getHeightFromIndex(dhni);
mlnodes[nextn-1].tetraroot = true;
mlnodes[nextn-1].ttreeindex = tt;
}
} else {
nextn = subtree2MulLabNodes(adhist, adhist.getLftFromIndex(dhni), tettrees, apsp);
int c0 = nextn - 1;
nextn = subtree2MulLabNodes(adhist, adhist.getRgtFromIndex(dhni), tettrees, apsp);
int c1 = nextn - 1;
mlnodes[nextn].addChildren(mlnodes[c0], mlnodes[c1]);
mlnodes[nextn].setHeight(adhist.getHeightFromIndex(dhni));
nextn++;
}
return nextn;
}
private int allopptree2MulLabNodes(AlloppSpeciesBindings apsp,
AlloppNode snode, int seq) {
if (snode.nofChildren() == 0) {
mlnodes[nextn].setTaxon(snode.getTaxon().getId() + seq);
mlnodes[nextn].setUnion(apsp.taxonseqToTipUnion(snode.getTaxon(), seq));
mlnodes[nextn].intetratree = true;
} else {
nextn = allopptree2MulLabNodes(apsp, snode.getChild(0), seq);
int c0 = nextn - 1;
nextn = allopptree2MulLabNodes(apsp, snode.getChild(1), seq);
int c1 = nextn - 1;
mlnodes[nextn].addChildren(mlnodes[c0], mlnodes[c1]);
}
mlnodes[nextn].setHeight(snode.getHeight());
nextn++;
return nextn;
}
private void makesimpletree() {
SimpleNode[] snodes = new SimpleNode[mlnodes.length];
for (int n = 0; n < mlnodes.length; n++) {
snodes[n] = new SimpleNode();
}
makesimplesubtree(snodes, 0, mlnodes[rootn]);
simptree = new SimpleTree(snodes[mlnodes.length-1]);
}
private int makesimplesubtree(SimpleNode[] snodes, int nextsn, MulLabNode mnode) {
if (mnode.lft < 0) {
snodes[nextsn].setTaxon(new Taxon(mnode.taxon.getId()));
} else {
nextsn = makesimplesubtree(snodes, nextsn, mlnodes[mnode.lft]);
int subtree0 = nextsn-1;
nextsn = makesimplesubtree(snodes, nextsn, mlnodes[mnode.rgt]);
int subtree1 = nextsn-1;
snodes[nextsn].addChild(snodes[subtree0]);
snodes[nextsn].addChild(snodes[subtree1]);
}
snodes[nextsn].setHeight(mnode.height);
String tti;
if (mnode.ttreeindex < 0) {
tti = "X";
} else {
tti = "T" + mnode.ttreeindex;
}
snodes[nextsn].setAttribute("tti", tti);
if (mnode.hybridheight >= 0.0) {
snodes[nextsn].setAttribute("hybhgt", mnode.hybridheight);
}
return nextsn+1;
}
private void clearSubtreeCoalescences(MulLabNode node) {
if (node.lft >= 0) {
clearSubtreeCoalescences(mlnodes[node.lft]);
clearSubtreeCoalescences(mlnodes[node.rgt]);
}
node.coalheights.clear();
}
private void recordSubtreeLineageCounts(MulLabNode node) {
if (node.lft < 0) {
node.nlineages = apsp.nLineages(apsp.spseqindex2sp(union2spseqindex(node.union)));
} else {
node.nlineages = 0;
recordSubtreeLineageCounts(mlnodes[node.lft]);
recordSubtreeLineageCounts(mlnodes[node.rgt]);
node.nlineages += mlnodes[node.lft].nlineages - mlnodes[node.lft].coalheights.size();
node.nlineages += mlnodes[node.rgt].nlineages - mlnodes[node.rgt].coalheights.size();
}
}
/*
* This copies population values in the Parameter popvalues
* to nodes in the AlloppMulLabTree. The population values are
* per-species-clade (per-branch in network), but of course more than
* one node in AlloppMulLabTree may correspond to the same species.
*
* The other complications are that tips are different from internal
* nodes, and that nodes which roots of tetratrees or just below,
* as well as the root are special cases.
*
* It collects unions (which represent sets whose elements
* identify a species and a sequence) from the nodes and then
* sorts them primarily using identities of the species, so
* that sets of node with same species clade are grouped together. The sort
* also puts the node sets corresponding to tips first in the array and sorts
* nodes within node sets in a well-defined way.
* This mainly does what is required, since nodes with the same
* species clade are treated the same.
*
* fillinpopvalsforspunion() deals with a set of nodes
* with same species clade.
*/
private void fillinpopvals() {
ArrayList<SpSqUnion> unionarraylist = new ArrayList<SpSqUnion>();
for (int n = 0; n < mlnodes.length; n++) {
unionarraylist.add(new SpSqUnion(mlnodes[n].union));
}
Collections.sort(unionarraylist, SPUNION_ORDER);
SpSqUnion[] unionarray = new SpSqUnion[unionarraylist.size()];
unionarray = unionarraylist.toArray(unionarray);
PopValIndices pvis = new PopValIndices(0,0,0);
// set all pop indices to dud values
for (int n = 0; n < mlnodes.length; n++) {
mlnodes[n].tippopindex = -1;
mlnodes[n].rootpopindex = -1;
mlnodes[n].hybpopindex = -1;
}
int noftippopvals = tippopvals.getDimension();
int nofrootpopvals = rootpopvals.getDimension();
for (int n0 = 0; n0 < unionarray.length; ) {
int n1 = n0+1;
while (n1 < unionarray.length && unionarray[n1].spunion.equals(unionarray[n0].spunion)) {
n1++;
}
assert pvis.tipp <= noftippopvals;
if (!(pvis.rootp <= nofrootpopvals)) {
System.out.println("BUG in fillinpopvals()");
}
assert pvis.rootp <= nofrootpopvals;
assert pvis.hybp <= nofhybpopvals;
pvis = fillinpopvalsforspunion(unionarray, n0, n1, pvis);
n0 = n1;
}
if (pvis.tipp != noftippopvals || pvis.rootp != nofrootpopvals || pvis.hybp != nofhybpopvals) {
System.out.println("BUG in fillinpopvals()");
}
assert pvis.tipp == noftippopvals;
assert pvis.rootp == nofrootpopvals;
assert pvis.hybp == nofhybpopvals;
}
private class PopValIndices {
public int tipp;
public int rootp;
public int hybp;
PopValIndices(int tipp, int rootp, int hybp) {
this.tipp = tipp;
this.rootp = rootp;
this.hybp = hybp;
}
}
private PopValIndices fillinpopvalsforspunion(SpSqUnion[] unionarray, int n0, int n1, PopValIndices pvis) {
int n = n1-n0;
MulLabNode nodeset[] = new MulLabNode[n];
// get set of nodes with same species clade
for (int i = n0; i < n1; i++) {
nodeset[i-n0] = (MulLabNode) mlnodes[rootn].nodeOfUnionInSubtree(unionarray[i].spsqunion);
}
// Hybridization pops. Nodes in pairs, shared pop.
if (nodeset[0].tetraroot) {
assert n >= 2;
assert nodeset[1].tetraroot;
for (int i = 0; i < 2; i++) {
nodeset[i].hybpopindex = pvis.hybp;
}
pvis.hybp++;
}
// Tip pops. Either dip or tet; in latter case shared pop.
int ntips = 0;
for (int i = 0; i < n; i++) {
if (nodeset[i].lft < 0) {
nodeset[i].tippopindex = pvis.tipp;
ntips++;
}
}
assert ntips <= 2;
if (ntips > 0) {
pvis.tipp++;
}
// Root pops within tetra trees. Nodes in pairs.
if (nodeset[0].intetratree && !nodeset[0].tetraroot) {
if (n != 2) {
System.out.println("BUG in fillinpopvalsforspunion() 2");
}
assert n == 2;
assert (nodeset[1].intetratree && !nodeset[1].tetraroot);
for (int i = 0; i < 2; i++) {
nodeset[i].rootpopindex = pvis.rootp;
}
pvis.rootp++;
} else {
// Root pops NOT within tetra trees. Here the nodeset[] structure is not useful.
// Here, pop is shared with sibling if either is ancestor to tetraploids only
for (int i = 0; i < n; i++) {
MulLabNode node = nodeset[i];
if (node.anc >= 0) {
MulLabNode sibling = siblingOfNode(node);
if (node.tetraancestor || sibling.tetraancestor) {
if (sibling.rootpopindex >= 0) {
node.rootpopindex = sibling.rootpopindex;
} else {
node.rootpopindex = pvis.rootp;
pvis.rootp++;
}
} else {
node.rootpopindex = pvis.rootp;
pvis.rootp++;
}
}
}
}
return pvis;
}
private MulLabNode siblingOfNode(MulLabNode node) {
assert node.anc >= 0;
MulLabNode sibling;
if (mlnodes[ mlnodes[node.anc].lft ] == node) {
sibling = mlnodes[ mlnodes[node.anc].rgt ];
} else {
assert mlnodes[ mlnodes[node.anc].rgt ] == node;
sibling = mlnodes[ mlnodes[node.anc].lft ];
}
return sibling;
}
/*
* Visits each node in MULtree and accumulates LogLikelihood
*/
private double geneTreeInMULSubtreeLogLikelihood(MulLabNode node) {
double loglike = 0.0;
if (node.lft >= 0) {
loglike += geneTreeInMULSubtreeLogLikelihood(mlnodes[node.lft]);
loglike += geneTreeInMULSubtreeLogLikelihood(mlnodes[node.rgt]);
}
loglike += branchLLInMULtree(node);
return loglike;
}
/*
* Does likelihood calculation for a single node in the case
* of two diploids.
*/
private double branchLLInMULtree(MulLabNode node) {
double loglike = 0.0;
double tippop = 0.0;
if (node.lft < 0) {
tippop = node.tippop();
} else {
tippop = mlnodes[node.lft].rootpop() + mlnodes[node.rgt].rootpop();
}
PopulationAndLineages pal;
double t[];
if (node.tetraroot) {
// since hybridization
int nsince = 0;
for ( ; nsince < node.coalheights.size() &&
node.coalheights.get(nsince) < node.hybridheight; nsince++) {}
t = new double[nsince + 2];
t[0] = node.height;
for (int i = 0; i < nsince; i++) {
t[i+1] = node.coalheights.get(i);
}
t[t.length-1] = node.hybridheight;
pal = new PopulationAndLineages(t, tippop, node.hybpop(), node.nlineages);
loglike += limbLogLike(pal);
if (Double.isNaN(loglike)) {
System.out.println("BUG in branchLLInMULtree");
}
// before hybridization
int nbefore = node.coalheights.size() - nsince;
t = new double[nbefore + 2];
t[0] = node.hybridheight;
for (int i = 0; i < nbefore; i++) {
t[i+1] = node.coalheights.get(nsince+i);
}
t[t.length-1] = mlnodes[node.anc].height;
pal = new PopulationAndLineages(t, node.rootpop(), node.rootpop(), node.nlineages - nsince);
loglike += limbLogLike(pal);
} else if (node.anc < 0) {
t = new double[node.coalheights.size() + 2];
t[0] = node.height;
t[t.length-1] = apsp.maxGeneTreeHeight();
for (int i = 0; i < node.coalheights.size(); i++) {
t[i+1] = node.coalheights.get(i);
}
pal = new PopulationAndLineages(t, tippop, tippop, node.nlineages);
loglike += limbLogLike(pal);
} else {
t = new double[node.coalheights.size() + 2];
t[0] = node.height;
t[t.length-1] = mlnodes[node.anc].height;
for (int i = 0; i < node.coalheights.size(); i++) {
t[i+1] = node.coalheights.get(i);
}
pal = new PopulationAndLineages(t, tippop, node.rootpop(), node.nlineages);
loglike += limbLogLike(pal);
}
if (Double.isNaN(loglike)) {
System.out.println("BUG in branchLLInMULtree");
}
return loglike;
}
/*
* limbLogLike calculates the log-likelihood for
* the coalescences at t[1],t[2],...t[k] within a limb
* from t[0] to t[k+1]. ('limb' means a branch or part of one.)
*/
private double limbLogLike(PopulationAndLineages pal) {
double loglike = 0.0;
int k = pal.t.length - 2;
for (int i = 1; i <= k; i++) {
loglike -= Math.log(pal.populationAt(pal.t[i]));
}
for (int i = 0; i <= k; i++) {
final double y = (pal.tipnlin-i) * (pal.tipnlin-i-1) / 2 ;
final double z = limbLinPopIntegral(pal, pal.t[i], pal.t[i+1]);
loglike -= y * z;
}
if (Double.isNaN(loglike)) {
System.out.println("BUG in limbLogLike");
}
return loglike;
}
// integral from t0 to t1 of (endt-begt)/((endt-x)begPop + (x-begt)endPop)
// with respect to x
private double limbLinPopIntegral(PopulationAndLineages b, double t0, double t1) {
final double begt = b.t[0];
final double endt = b.t[b.t.length-1];
if (b.rootpop < 1E-20) {
System.out.println("Underflow in limbLinPopIntegral()");
}
final double d = b.rootpop - b.tippop;
final double c = endt * b.tippop - begt * b.rootpop;
final double x = Math.abs(d / b.tippop);
if (x > 0.001) {
return ((endt - begt) / d) * Math.log((c + d * t1) / (c + d * t0));
} else {
double y = d * (t1 - t0) / (c + d * t0);
double ys = (1.0 - y/2.0 + y*y * (1.0/3.0 - y/4.0 + y*y * (1.0/5.0 - y/6.0)));
return ((endt - begt) * (t1 - t0) / (c + d * t0)) * ys;
}
}
private static int union2spseqindex(FixedBitSet union) {
assert union.cardinality() == 1;
return union.nextOnBit(0);
}
/*
* This is for ordering the unions in the nodes of the AlloppMulLabTree.
* Those unions are of (species, sequence) pairs.
*
* The comparator sorts the unions of (species, sequence) pairs (SpSqUnions)
* so that all unions containing the same set of species (ignoring sequence)
* are grouped together. Call the sets of SpSqUnions for the same species a
* `group'. There can be 1,2 or 3 SpSqUnions in a group.
*
* The groups are sorted in order of increasing number of species (clade size).
* All groups for a single species (a tip in the network) come first, then
* those groups for two species, and so on to the root for all species.
* For groups that have equal numbers of species, a lexicographical
* ordering using species indices is used.
*
* Within each group, species and sequence information is used to sort the 1 to 3
* SpSqUnions. The size of the `clade' of (species, sequence) pairs is used
* first in the comparison, which ensures that the three nodes with the same species
* - corresponding to two roots of tetratrees in the AlloppMulLabTree plus a leg-join -
* are ordered so that the two roots come first.
*
*/
static final Comparator<SpSqUnion> SPUNION_ORDER = new Comparator<SpSqUnion>() {
public int compare(SpSqUnion a, SpSqUnion b) {
int ac = a.spunion.cardinality();
int bc = b.spunion.cardinality();
if (ac != bc) {
return ac-bc;
} else {
int an = a.spunion.nextOnBit(0);
int bn = b.spunion.nextOnBit(0);
while (an >= 0 || bn >= 0) {
if (an != bn) {
return an-bn;
}
an = a.spunion.nextOnBit(an+1);
bn = b.spunion.nextOnBit(bn+1);
}
// spunions compare equal; do spsqunions
ac = a.spsqunion.cardinality();
bc = b.spsqunion.cardinality();
if (ac != bc) {
return ac-bc;
} else {
an = a.spsqunion.nextOnBit(0);
bn = b.spsqunion.nextOnBit(0);
while (an >= 0 || bn >= 0) {
if (an != bn) {
return an-bn;
}
an = a.spsqunion.nextOnBit(an+1);
bn = b.spsqunion.nextOnBit(bn+1);
}
return 0;
}
}
}
};
}