/*
* SpeciesDelimitationAnalyser.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.app.tools;
import dr.app.beast.BeastVersion;
import dr.app.util.Arguments;
import dr.evolution.io.Importer;
import dr.evolution.io.NexusImporter;
import dr.evolution.tree.NodeRef;
import dr.evolution.tree.Tree;
import dr.evolution.util.TaxonList;
import dr.evomodel.alloppnet.speciation.BirthDeathCollapseModel;
import dr.util.Version;
import java.io.*;
import java.util.*;
/**
* @author Graham Jones
* Date: 01/09/2013
*/
public class SpeciesDelimitationAnalyser {
private final static Version version = new BeastVersion();
private ArrayList<Cluster> clusters;
private TaxonList taxonList;
private int burnin;
double collapseheight;
private double similaritycutoff;
private String inputFileName;
private String outputFileName;
private class Cluster {
private int count;
private int [] partition;
private double totalsimilarity;
private boolean deleted;
public Cluster(TaxonList taxonList, Tree tree, double collapseheight) {
int nshort = 0;
count = 1;
partition = new int[taxonList.getTaxonCount()];
totalsimilarity = 1.0;
deleted = false;
int nnodes = tree.getNodeCount();
for (int n = 0; n < nnodes; n++) {
NodeRef nr = tree.getNode(n);
boolean collapse;
if (tree.isRoot(nr)) {
collapse = BirthDeathCollapseModel.belowCollapseHeight(tree.getNodeHeight(nr), collapseheight);
} else {
NodeRef anc = tree.getParent(nr);
collapse = (BirthDeathCollapseModel.belowCollapseHeight(tree.getNodeHeight(nr), collapseheight)
&& !BirthDeathCollapseModel.belowCollapseHeight(tree.getNodeHeight(anc), collapseheight));
}
if (collapse) {
nshort++;
nodeToClade(taxonList, tree, nr, nshort);
}
}
}
double distance(Cluster sd) {
double d = 0.0;
for (int i = 0; i < partition.length; i++) {
for (int j = 0; j < i; j ++) {
boolean pair = partition[i] == partition[j];
boolean sdpair = sd.partition[i] == sd.partition[j];
if (pair != sdpair) {
d += 1.0;
}
}
}
return d / (partition.length * (partition.length-1.0) / 2.0);
}
double similarity(Cluster sd) {
return 1.0 - distance(sd);
}
private void nodeToClade(TaxonList taxonlist, Tree tree, NodeRef nr, int idx) {
if (tree.isExternal(nr)) {
String id = tree.getNodeTaxon(nr).getId();
int i = taxonIdToInt(taxonlist, id);
partition[i] = idx;
} else {
assert 2 == tree.getChildCount(nr);
nodeToClade(taxonlist, tree, tree.getChild(nr, 0), idx);
nodeToClade(taxonlist, tree, tree.getChild(nr, 1), idx);
}
}
int taxonIdToInt(TaxonList taxonList, String id) {
int j = -1;
for (int i = 0; i < taxonList.getTaxonCount(); i++) {
String tid = taxonList.getTaxonId(i);
if (id.compareTo(tid) == 0) {
assert j < 0;
j = i;
}
}
assert j >= 0;
return j;
}
}
SpeciesDelimitationAnalyser(int burnin, double collapseheight, double similaritycutoff, String inputFileName, String outputFileName) {
this.burnin = burnin;
this.collapseheight = collapseheight;
this.similaritycutoff = similaritycutoff;
this.inputFileName = inputFileName;
this.outputFileName = outputFileName;
}
void readtrees() throws IOException {
clusters = new ArrayList<Cluster>(0);
int totalTrees;
FileReader fileReader = new FileReader(inputFileName);
NexusImporter importer = new NexusImporter(fileReader);
System.out.println("Reading trees...");
try {
taxonList = importer.parseTaxaBlock();
totalTrees = 0;
while (importer.hasTree()) {
Tree tree = importer.importNextTree();
if (totalTrees >= burnin) {
Cluster sd = new Cluster(taxonList, tree, collapseheight);
clusters.add(sd);
}
if (totalTrees > 0 && (totalTrees % 100 == 0)) {
System.out.print("*");
System.out.flush();
}
if (totalTrees > 0 && (totalTrees % 5000 == 0)) {
System.out.println(" " + totalTrees);
System.out.flush();
}
totalTrees++;
}
System.out.println("");
} catch (Importer.ImportException e) {
System.err.println("Error Parsing Input Tree: " + e.getMessage());
return;
}
fileReader.close();
if (totalTrees < 1) {
System.err.println("No trees");
return;
}
System.out.println("Total trees read: " + totalTrees);
}
void countclusterings() {
if (burnin > 0) {
System.out.println("Ignoring first " + burnin + " trees.");
}
System.out.println("Counting clusterings... ");
for (int i = 0; i < clusters.size(); i++) {
if (i > 0 && (i % 100 == 0)) {
System.out.print("*");
System.out.flush();
}
if (i > 0 && (i % 5000 == 0)) {
System.out.println(" " + i);
System.out.flush();
}
for (int j = i+1; j < clusters.size(); j++) {
if (clusters.get(j).count > 0) {
double simij = clusters.get(i).similarity(clusters.get(j));
if (simij == 1.0) {
clusters.get(i).count += 1;
clusters.get(j).count = 0;
clusters.get(j).deleted = true;
}
if (simij >= similaritycutoff || simij == 1.0) {
clusters.get(i).totalsimilarity += simij;
}
}
}
}
Collections.sort(clusters, CLUSTER_COMPARATOR);
}
void writeresults() throws IOException {
FileWriter fileWriter = new FileWriter(outputFileName);
fileWriter.write("count fraction similarity nclusters ");
for (int i = 0; i < taxonList.getTaxonCount(); i++) {
String tstr = taxonList.getTaxonId(i);
while (tstr.length() < 4) tstr += " ";
fileWriter.write(tstr + " ");
}
fileWriter.write("\n");
for (int i = 0; i < clusters.size(); i++) {
Cluster sd = clusters.get(i);
if (sd.count > 0) {
String countstr = "" + sd.count;
while (countstr.length() < 10) countstr += " ";
String fracstr = "" + (double)sd.count / (double) clusters.size();
while (fracstr.length() < 22) fracstr += " ";
String simstr = "" + sd.totalsimilarity;
while (simstr.length() < 22) simstr += " ";
int clustercount = 0;
for (int j = 0; j < sd.partition.length; j ++) {
if (sd.partition[j] > clustercount) {
clustercount = sd.partition[j];
}
}
String ccstr = "" + clustercount;
while (ccstr.length() < 10) ccstr += " ";
fileWriter.write(countstr + " " + fracstr + " " + simstr + " " + ccstr + " ");
for (int j = 0; j < sd.partition.length; j ++) {
String pjstr = "" + sd.partition[j];
while (pjstr.length() < 4) { pjstr += " "; }
fileWriter.write(pjstr + " ");
}
fileWriter.write("\n");
}
}
fileWriter.write("\n");
fileWriter.close();
}
public static void printTitle() {
System.out.println();
centreLine("SpeciesDelimitationAnalyser " + version.getVersionString() + ", " + version.getDateString(), 60);
centreLine("Finds clusterings of individuals into clusters", 60);
centreLine("(ie possible species) from MCMC tree samples", 60);
centreLine("by", 60);
centreLine("Graham Jones", 60);
centreLine("www.indriid.com", 60);
System.out.println();
System.out.println();
}
public static void centreLine(String line, int pageWidth) {
int n = pageWidth - line.length();
int n1 = n / 2;
for (int i = 0; i < n1; i++) {
System.out.print(" ");
}
System.out.println(line);
}
public static void printUsage(Arguments arguments) {
arguments.printUsage("SpeciesDelimitationAnalyser", "<input-file-name> <output-file-name>");
System.out.println();
System.out.println(" Example: SpeciesDelimitationAnalyser treesamples.txt out.txt");
System.out.println();
}
static final Comparator<Cluster> CLUSTER_COMPARATOR = new Comparator<Cluster>() {
public int compare(Cluster a, Cluster b) {
if (a.deleted != b.deleted) {
return a.deleted ? 1 : -1;
}
if (b.totalsimilarity != a.totalsimilarity) {
return (b.totalsimilarity > a.totalsimilarity) ? 1 : -1;
}
return 0;
}
};
public static void main(String[] args) throws java.io.IOException {
Locale.setDefault(Locale.US);
printTitle();
Arguments arguments = new Arguments(
new Arguments.Option[]{
new Arguments.IntegerOption("burnin", "the number of states to be considered as 'burn-in' [default = none]"),
new Arguments.RealOption("collapseheight", "the height below which nodes get collapsed [default = .001]"),
new Arguments.RealOption("simcutoff", "the value above which two clusters are regarded as similar enough to support one another's credibility [default = .9]"),
new Arguments.Option("help", "option to print this message")
});
try {
arguments.parseArguments(args);
} catch (Arguments.ArgumentException ae) {
System.out.println(ae);
printUsage(arguments);
System.exit(1);
}
if (arguments.hasOption("help")) {
printUsage(arguments);
System.exit(0);
}
int burnin = -1;
if (arguments.hasOption("burnin")) {
burnin = arguments.getIntegerOption("burnin");
}
System.out.println();
double collapseheight = 0.001;
if (arguments.hasOption("collapseheight")) {
collapseheight = arguments.getRealOption("collapseheight");
}
double similaritycutoff = 0.9;
if (arguments.hasOption("simcutoff")) {
similaritycutoff = arguments.getRealOption("simcutoff");
}
System.out.println("burnin " + burnin + " collapseheight " + collapseheight + " simcutoff "+ similaritycutoff);
String[] args2 = arguments.getLeftoverArguments();
if (args2.length > 2) {
System.err.println("Unknown option: " + args2[2]);
System.err.println();
printUsage(arguments);
System.exit(1);
}
if (args2.length < 2) {
System.err.println("Input filename and outputfilename required");
System.err.println();
printUsage(arguments);
System.exit(1);
}
String inputFileName = args2[0];
String outputFileName = args2[1];
SpeciesDelimitationAnalyser spDA =
new SpeciesDelimitationAnalyser(burnin, collapseheight, similaritycutoff, inputFileName, outputFileName);
spDA.readtrees();
spDA.countclusterings();
spDA.writeresults();
System.exit(0);
}
}