/* * LogFileTraces.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.inference.trace; import java.io.*; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import java.util.TreeMap; /** * A class that stores a set of traces from a single chain * * @author Andrew Rambaut * @author Alexei Drummond * @version $Id: LogFileTraces.java,v 1.4 2006/11/30 17:39:29 rambaut Exp $ */ public class LogFileTraces extends AbstractTraceList { public LogFileTraces(String name, File file) { this.name = name; this.file = file; } /** * @return the name of this traceset */ public String getName() { return name; } public File getFile() { return file; } /** * @return the last state in the chain */ public long getMaxState() { return lastState; } public boolean isIncomplete() { return false; } /** * @return the number of states excluding the burnin */ public int getStateCount() { // This is done as two integer divisions to ensure the same rounding for // the burnin... return (int) (((lastState - firstState) / stepSize) - (getBurnIn() / stepSize) + 1); } /** * @return the number of states in the burnin */ public int getBurninStateCount() { return (int) (getBurnIn() / stepSize); } /** * @return the size of the step between states */ public long getStepSize() { return stepSize; } public long getBurnIn() { return burnIn; } /** * @return the number of traces in this traceset */ public int getTraceCount() { return traces.size(); } /** * @return the index of the trace with the given name */ public int getTraceIndex(String name) { for (int i = 0; i < traces.size(); i++) { Trace trace = getTrace(i); if (name.equals(trace.getName())) { return i; } } return -1; } /** * @return the name of the trace with the given index */ public String getTraceName(int index) { return getTrace(index).getName(); } /** * @param index requested trace index * @return the trace for a given index */ public Trace getTrace(int index) { return traces.get(index); } public void setBurnIn(long burnin2) { this.burnIn = (int) burnin2; for (Trace trace : traces) { trace.setTraceStatistics(null); } } public double getStateValue(int trace, int index) { return (Double) getTrace(trace).getValue(index + (int) (getBurnIn() / stepSize)); } /** * Read several consecutive values of one state into a destination array * * @param nState State index number * @param destination array to store result * @param offset first trace index */ public void getStateValues(int nState, double[] destination, int offset) { final int index1 = nState + (int) (getBurnIn() / stepSize); for (int k = 0; k < destination.length; ++k) { destination[k] = (Double) getTrace(k + offset).getValue(index1); } } /** * Use the flag boolean[] filtered in FilteredTraceList * to determine whether to remove filtered values, * when filtered != null * @param index the index of trace * @param fromIndex low endpoint (inclusive) of the subList. * @param toIndex high endpoint (exclusive) of the subList. * @return */ public List getValues(int index, int fromIndex, int toIndex) { List values = null; try { Trace trace = getTrace(index); values = trace.getValues(fromIndex, toIndex, super.filtered); } catch (IndexOutOfBoundsException e) { System.err.println("getValues error: trace index = " + index); } return values; } public List getValues(int index) { return this.getValues(index, getBurninStateCount(), getTrace(index).getValueCount()); } public List getBurninValues(int index) { return this.getValues(index, 0, getBurninStateCount()); } /** * Use the {@link #loadTraces(File) loadTraces} method, * where <code>File</code> is defined from the constructor. * * @throws TraceException * @throws IOException */ public void loadTraces() throws TraceException, IOException { loadTraces(file); } /** * Read through <code>File</code> created from a log file, * fill in <code>traces</code> list, and set <code>TraceType</code>. * * @param file <code>File</code> * @throws TraceException * @throws IOException */ public void loadTraces(File file) throws TraceException, IOException { final Reader reader = new FileReader(file); loadTraces(reader); reader.close(); } /** * Read through <code>InputStream</code> created from a log file, * fill in <code>traces</code> list, and set <code>TraceType</code>. * * @param in <code>InputStream</code> * @throws TraceException * @throws IOException */ public void loadTraces(InputStream in) throws TraceException, IOException { final Reader reader = new InputStreamReader(in); loadTraces(reader); reader.close(); } /** * Read through either <code>FileReader</code> or <code>InputStreamReader</code> * created from a log file, * fill in <code>traces</code> list, and set <code>TraceType</code>. * * @param r The input for <code>TrimLineReader</code>. * Use either <code>FileReader</code> or <code>InputStreamReader</code> * @throws TraceException * @throws java.io.IOException */ private void loadTraces(Reader r) throws TraceException, java.io.IOException { final TrimLineReader reader = new LogFileTraces.TrimLineReader(r); // Read through to first token StringTokenizer tokens = reader.tokenizeLine(); if (tokens == null) { throw new TraceException("Trace file is empty."); } // read over empty lines while (!tokens.hasMoreTokens()) { tokens = reader.tokenizeLine(); } // skip the first column which should be the state number String token = tokens.nextToken(); // lines starting with [ are ignored, assuming comments in MrBayes file // lines starting with # are ignored, assuming comments in Migrate or BEAST file while (token.startsWith("[") || token.startsWith("#")) { readTraceType(token, tokens); // using # to define type tokens = reader.tokenizeLine(); // read over empty lines while (!tokens.hasMoreTokens()) { tokens = reader.tokenizeLine(); } // read state token and ignore token = tokens.nextToken(); } // read label tokens String[] labels = new String[tokens.countTokens()]; for (int i = 0; i < labels.length; i++) { labels[i] = tokens.nextToken(); addTraceAndType(labels[i]); } int traceCount = getTraceCount(); long num_samples = 0; String line = reader.readLine(); tokens = reader.getStringTokenizer(line); String lastLine = line; while (tokens != null && tokens.hasMoreTokens()) { String stateString = tokens.nextToken(); long state = 0; try { try { // Changed this to parseDouble because LAMARC uses scientific notation for the state number state = (long) Double.parseDouble(stateString); } catch (NumberFormatException nfe) { throw new TraceException("Unable to parse state number in column 1 (Line " + reader.getLineNumber() + ")"); } if (num_samples < 1) { // MrBayes puts 1 as the first state, BEAST puts 0 // In order to get the same gap between subsequent samples, // we force this to 0. if (state == 1) state = 0; } num_samples += 1; if (!addState(state, num_samples)) { throw new TraceException("State " + state + " is not consistent with previous spacing (Line " + reader.getLineNumber() + ")"); } } catch (NumberFormatException nfe) { throw new TraceException("State " + state + ":Expected real value in column " + reader.getLineNumber()); } for (int i = 0; i < traceCount; i++) { if (tokens.hasMoreTokens()) { String value = tokens.nextToken(); if (state == 0) assignTraceTypeAccordingValue(i, value); try { // values[i] = Double.parseDouble(tokens.nextToken()); addParsedValue(i, value); } catch (NumberFormatException nfe) { throw new TraceException("State " + state + ": Expected correct data type " + "(Double, Integer or String) in column " + (i + 1) + " (Line " + reader.getLineNumber() + ")"); } } else { throw new TraceException("State " + state + ": missing values at line " + reader.getLineNumber()); } } // used to keep the last valid line lastLine = line; // tokens = reader.tokenizeLine(); line = reader.readLine(); tokens = reader.getStringTokenizer(line); } if (num_samples == 0) throw new TraceException("Incorrect file format, no sample is found !"); burnIn = lastState / 10; if (lastState < 0) lastState = firstState; if (stepSize < 0 && lastState > 0) stepSize = lastState; validateTraceType(lastLine); validateUniqueValues(); } public static final int MIN_SAMPLE = 5; // used in StatisticsModel private final int MAX_UNIQUE_VALUE = 200; // change integer type into real, if too many unique values private void validateUniqueValues() throws TraceException { for (int id = 0; id < getTraceCount(); id++) { Trace trace = getTrace(id); if (trace.getTraceType().isInteger()) { int uniqueValue = trace.getUniqueVauleCount(); if (uniqueValue > MAX_UNIQUE_VALUE) { System.out.println("Too many unique values (>" + MAX_UNIQUE_VALUE + ") found in trace " + trace.getName() + " at " + id); changeTraceType(id, TraceType.REAL); } } } } // validate TraceType at the last value of trace, // in case integer is logged for double values in the first (even several) row. // it must use original line, because the data type of values in traces are changed private void validateTraceType(String lastLine) throws TraceException { String[] values = lastLine.split("\\t"); // the 1st is state for (int i=1; i < values.length; i++) { int traceId = i-1; Trace trace = getTrace(traceId); // avoid to assign integer to double incorrectly if (trace.getTraceType().isInteger() && NumberUtils.hasDecimalPoint(values[i])) changeTraceType(traceId, TraceType.REAL); } } /** * add a value for the n'th trace * * @param nTrace trace index * @param value next value */ private void addParsedValue(int nTrace, String value) { String name = getTraceName(nTrace); // System.out.println(thisTrace.getTraceType() + " " + value); if (tracesType.get(name).isNumber()) { Double v = Double.parseDouble(value); getTrace(nTrace).add(v); } else { getTrace(nTrace).add(value); } } /** * Auto assign INTEGER or CATEGORICAL type to traces * according their values in the first line. * Default type is REAL. * * @param nTrace * @param value */ private void assignTraceTypeAccordingValue(int nTrace, String value) throws TraceException { String name = getTraceName(nTrace); TraceType type = TraceType.REAL; if (NumberUtils.isNumber(value)) { // Double or Integer if (! NumberUtils.hasDecimalPoint(value)) { // Integer type = TraceType.INTEGER; // change tracesType map for tracesType.put(name, type); System.out.println("Auto detect " + type + " type for trace " + name + " at " + nTrace); changeTraceType(nTrace, type); } } else { // String type = TraceType.CATEGORICAL; tracesType.put(name, type); System.out.println("Auto detect " + type + " type for trace " + name + " at " + nTrace); changeTraceType(nTrace, type); } } /** * @deprecated should be replaced by * {@link #assignTraceTypeAccordingValue(int, String) assignTraceTypeAccordingValue} method * * @param firstToken * @param tokens */ private void readTraceType(String firstToken, StringTokenizer tokens) { if (tokens.hasMoreTokens()) { String token; //= tokens.nextToken(); if (firstToken.toLowerCase().contains(TraceType.INTEGER.toString())) { while (tokens.hasMoreTokens()) { token = tokens.nextToken(); tracesType.put(token, TraceType.INTEGER); } } else if (firstToken.toLowerCase().contains(TraceType.CATEGORICAL.toString())) { while (tokens.hasMoreTokens()) { token = tokens.nextToken(); tracesType.put(token, TraceType.CATEGORICAL); } } } } //************************************************************************ // private methods //************************************************************************ // These methods are used by the load function, above /** * Add a trace for a statistic of the given name * * @param name trace name */ private void addTraceAndType(String name) { if (tracesType.get(name) == null) { traces.add(createTrace(name, TraceType.REAL)); tracesType.put(name, TraceType.REAL); } else { traces.add(createTrace(name, tracesType.get(name))); } } private Trace createTrace(String name, TraceType traceType) { if (traceType.isNumber()) { return new Trace<Double>(name, traceType); } else { return new Trace<String>(name, TraceType.CATEGORICAL); } } // TODO get rid of generic to make things easy // TODO change to String only, and parse to double, int or string in getValues according to trace type public void changeTraceType(int id, TraceType newType) throws TraceException { if (id >= getTraceCount() || id < 0) throw new TraceException("Invalid trace id : " + id + ", which should 0 < and >= " + getTraceCount()); Trace trace = getTrace(id); TraceType oldType = trace.getTraceType(); if (oldType != newType) { Trace newTrace = createTrace(trace.getName(), newType); if (newType.isDiscrete()) { int uniqueValue = trace.getUniqueVauleCount(); if (uniqueValue > MAX_UNIQUE_VALUE) throw new TraceException("Type change is failed, because too many unique values (>" + MAX_UNIQUE_VALUE + ") are found !"); } if (oldType.isCategorical() || newType.isCategorical()) { try { if (newType.isNumber()) { // oldType.isCategorical() for (int i = 0; i < trace.getValueCount(); i++) { newTrace.add(Double.parseDouble(trace.getValue(i).toString())); // String => Double } } else if (oldType.isContinuous()) { // newType.isCategorical() for (int i = 0; i < trace.getValueCount(); i++) { newTrace.add(trace.getValue(i).toString()); // Double => String } } else if (oldType.isIntegerOrBinary()) { // newType.isCategorical() // treat Integer separately to rm .0 because Trace<Double> for (int i = 0; i < trace.getValueCount(); i++) { String value = trace.getValue(i).toString(); String valueNoDecimal = String.valueOf(value).split("\\.")[0]; newTrace.add(valueNoDecimal); // Integer => String } } } catch (Exception e) { throw new TraceException("Type change is failed, when parsing " + oldType + " to " + newType + " in trace " + trace.getName()); } if (newTrace.getValueCount() != trace.getValueCount()) throw new TraceException("Type change is failed, because values size is different after copy !"); traces.set(id, newTrace); } else { trace.setTraceType(newType); // change between numeric } System.out.println("Change " + oldType + " to " + newType + " type for trace " + trace.getName() + " at " + id); } } /** * Add a state number for these traces. This should be * called before adding values for each trace. The spacing * between stateNumbers should remain constant. * * @param stateNumber the state * @param num_samples the number of samples (rows) * @return false if the state number is inconsistent */ private boolean addState(long stateNumber, long num_samples) { if (firstState < 0) { // it can use num_samples==1 to replace firstState < 0 firstState = stateNumber; } else if (secondState < 0) { secondState = stateNumber; } else if (stepSize < 0) { // delay setting of the stepSize until the step between // the second and third step in case the first step is // 1 (i.e., MrBayes) and the stepsize is 1. stepSize = stateNumber - secondState; } else { long step = stateNumber - lastState; if (step != stepSize) { System.out.println("step: " + step + " != " + stepSize); return false; } } // System.out.println(num_samples + ": stateNumber=" + stateNumber + " lastState=" + lastState + " firstState=" + firstState + " stepSize=" + stepSize); lastState = stateNumber; return true; } protected final File file; protected final String name; private final List<Trace> traces = new ArrayList<Trace>(); public void addTrace(String newTName, int i) { TraceCustomized tc = new TraceCustomized(newTName); tc.addValues(traces.get(i)); // only Double traces.add(tc); tracesType.put(newTName, TraceType.REAL); } /** * store INTEGER or STRING predefined at the top of log file, only used during loading files * @deprecated should be replaced by * {@link #assignTraceTypeAccordingValue(int, String) assignTraceTypeAccordingValue} method */ private TreeMap<String, TraceType> tracesType = new TreeMap<String, TraceType>(); private long burnIn = -1; private long firstState = -1; private long secondState = -1; private long lastState = -1; private long stepSize = -1; public static class TrimLineReader extends BufferedReader { public TrimLineReader(Reader reader) { super(reader); } public String readLine() throws java.io.IOException { lineNumber += 1; String line = super.readLine(); if (line != null) return line.trim(); return null; } public StringTokenizer tokenizeLine() throws java.io.IOException { String line = readLine(); return getStringTokenizer(line); } public StringTokenizer getStringTokenizer(String line) { if (line == null) return null; return new StringTokenizer(line, "\t"); } public int getLineNumber() { return lineNumber; } private int lineNumber = 0; } }