/*
* Convergence.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
/**
*
*/
package dr.evomodel.tree;
import dr.evolution.tree.NodeRef;
import dr.evolution.tree.Tree;
import dr.evolution.util.Taxon;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
/**
* @author shhn001
*/
public class Convergence {
double distance = Double.MAX_VALUE;
public final static String LOG_CONVERGENCE = "LogConvergence";
public final static String TREE_FILE_NAME = "treeFilename";
public final static String REFERENCE_FILE_NAME = "referenceFilename";
public final static String CHECK_EVERY = "checkEvery";
private Tree tree = null;
private long trees = 0;
private HashMap<BitSet, Double> referenceCladeFrequencies = null;
private HashMap<BitSet, Double> cladeOccurences = null;
private HashMap<String, Integer> taxonMap;
private String[] taxonArray;
private String referenceTreeFileName;
private double logEvery;
/**
*
*/
public Convergence(Tree tree, int logEvery, String referenceTreeFileName) {
this.logEvery = logEvery;
this.referenceTreeFileName = referenceTreeFileName;
this.tree = tree;
taxonMap = new HashMap<String, Integer>();
cladeOccurences = new HashMap<BitSet, Double>();
startLogging();
}
private void startLogging() {
parseReferenceFile(referenceTreeFileName);
taxonArray = new String[taxonMap.size()];
Set<String> taxon = taxonMap.keySet();
for (String s : taxon) {
taxonArray[taxonMap.get(s)] = s;
}
}
private void parseReferenceFile(String log) {
FileReader fr;
BufferedReader br;
String line;
referenceCladeFrequencies = new HashMap<BitSet, Double>();
try {
fr = new FileReader(log);
br = new BufferedReader(fr);
line = br.readLine();
String[] tokens = line.split("\\s++");
long total = Long.parseLong(tokens[0]);
while ((line = br.readLine()) != null) {
String[] taxa = getTaxa(line);
BitSet split = new BitSet();
for (String t : taxa) {
if (!taxonMap.containsKey(t)) {
taxonMap.put(t, taxonMap.size());
}
split.set(taxonMap.get(t));
}
tokens = line.split("\\s++");
double frequency = (100.0 * Long.parseLong(tokens[0])) / (double) total;
referenceCladeFrequencies.put(split,
frequency);
}
br.close();
fr.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private String[] getTaxa(String line) {
line = line.substring(line.indexOf('(') + 1, line.indexOf(')'));
String[] taxa = line.split(",");
return taxa;
}
public double log(long state) {
if (logEvery <= 0 || ((state % logEvery) == 0)) {
addTree(tree);
distance = getMaxCladeDistance();
}
return distance;
}
private void addTree(Tree tree) {
trees++;
List<BitSet> splits = getSplits(tree);
for (BitSet s : splits) {
if (!cladeOccurences.containsKey(s)) {
cladeOccurences.put(s, 1.0);
} else {
cladeOccurences.put(s, cladeOccurences.get(s) + 1);
}
}
}
private List<BitSet> getSplits(Tree tree) {
List<BitSet> splits = new ArrayList<BitSet>();
NodeRef root = tree.getRoot();
fillSplits(splits, root, tree);
return splits;
}
private BitSet fillSplits(List<BitSet> splits, NodeRef root, Tree tree) {
BitSet split = new BitSet();
if (!tree.isExternal(root)) {
split.or(fillSplits(splits, tree.getChild(root, 0), tree));
split.or(fillSplits(splits, tree.getChild(root, 1), tree));
splits.add(split);
} else {
Taxon taxon = tree.getNodeTaxon(root);
String name = taxon.getId();
split.set(taxonMap.get(name));
}
return split;
}
private double getMaxCladeDistance() {
return getMaxDeviation();
}
private double getMaxDeviation() {
double[] deviation = getDeviations();
double max = 0;
for (double m : deviation) {
if (m > max) {
max = m;
}
}
return max;
}
private double[] getDeviations() {
double[] deviations = new double[Math.max(referenceCladeFrequencies
.size(), cladeOccurences.size())];
Set<BitSet> keys = null;
if (referenceCladeFrequencies.size() < cladeOccurences.size()) {
keys = cladeOccurences.keySet();
} else {
keys = referenceCladeFrequencies.keySet();
}
Iterator<BitSet> it = keys.iterator();
for (int i = 0; i < deviations.length; i++) {
BitSet k = it.next();
double r = 0.0;
if (referenceCladeFrequencies.containsKey(k)) {
r = referenceCladeFrequencies.get(k);
}
double o = 0.0;
if (cladeOccurences.containsKey(k)) {
o = cladeOccurences.get(k);
}
deviations[i] = Math.abs(r - ((o * 100.0) / (double) trees));
}
return deviations;
}
}