/* * TreeMetrics.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.evolution.tree; import dr.evolution.io.NewickImporter; import dr.evolution.util.TaxonList; import java.io.FileReader; /** * various utility functions on trees. * * @version $Id: TreeMetrics.java,v 1.7 2005/05/24 20:25:57 rambaut Exp $ * * @author Alexei Drummond * @author Korbinian Strimmer */ public class TreeMetrics { /** * computes Robinson-Foulds (1981) distance between two trees * * @param t1 tree 1 * @param t2 tree 2 * * Definition: Assuming that t1 is the reference tree, let fn be the * false negatives, i.e. the number of edges in t1 missing in t2, * and fp the number of false positives, i.e. the number of edges * in t2 missing in t1. The RF distance is then (fn + fp)/2 */ public static double getRobinsonFouldsDistance(Tree t1, Tree t2) { SplitSystem s1 = SplitUtils.getSplits(t1); return getRobinsonFouldsDistance(s1, t2); } /** * computes Robinson-Foulds (1981) distance between two trees * * @param s1 tree 1 (as represented by a SplitSystem) * @param t2 tree 2 */ public static double getRobinsonFouldsDistance(SplitSystem s1, Tree t2) { TaxonList taxonList = s1.getTaxonList(); SplitSystem s2 = SplitUtils.getSplits(taxonList, t2); if (s1.getLabelCount() != s2.getLabelCount()) throw new IllegalArgumentException("Number of labels must be the same!"); int ns1 = s1.getSplitCount(); int ns2 = s2.getSplitCount(); // number of splits in t1 missing in t2 int fn = 0; for (int i = 0; i < ns1; i++) { if (!s2.hasSplit(s1.getSplit(i))) fn++; } // number of splits in t2 missing in t1 int fp = 0; for (int i = 0; i < ns2; i++) { if (!s1.hasSplit(s2.getSplit(i))) fp++; } return 0.5*((double) fp + (double) fn); } /** * computes Robinson-Foulds (1981) distance between two trees * rescaled to a number between 0 and 1 * * @param t1 tree 1 * @param t2 tree 2 */ public static double getRobinsonFouldsRescaledDistance(Tree t1, Tree t2) { SplitSystem s1 = SplitUtils.getSplits(t1); return getRobinsonFouldsRescaledDistance(s1, t2); } /** * computes Robinson-Foulds (1981) distance between two trees * rescaled to a number between 0 and 1 * * @param s1 tree 1 (as represented by a SplitSystem) * @param t2 tree 2 */ public static double getRobinsonFouldsRescaledDistance(SplitSystem s1, Tree t2) { return getRobinsonFouldsDistance(s1, t2)/(double) s1.getSplitCount(); } /** * Analyze trace * @param trees an array of trees * @param update the step size between instances of tree */ private static void analyze(Tree[] trees, int update) { TaxonList masterList = trees[0]; System.out.println("Calculating splits..."); SplitSystem[] splits = new SplitSystem[trees.length]; for (int i =0; i < splits.length; i++) { splits[i] = SplitUtils.getSplits(masterList, trees[i]); } int maxOffset = MAX_OFFSET; int samples = trees.length; if ((samples/3) < maxOffset) { maxOffset = (samples/3); } double[] meanDistances = new double[maxOffset]; System.out.println("Calculating mean distance per lag..."); for (int i=0; i < maxOffset; i++) { meanDistances[i] = 0; for (int j = 0; j < samples-i; j++) { double distance = getRobinsonFouldsRescaledDistance(splits[i], trees[j]); meanDistances[i] += distance; } meanDistances[i] /= ((double) samples-i); System.out.println(meanDistances[i]); } } public static final void main(String[] args) throws Exception { FileReader reader = new FileReader(args[0]); NewickImporter importer = new NewickImporter(reader); Tree[] trees = importer.importTrees(null); System.out.println("Imported " + trees.length + " trees..."); analyze(trees, 1000); } private static int MAX_OFFSET = 1000; }