/*
* LogFileTraces.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.inference.trace;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.TreeMap;
/**
* A class that stores a set of traces from a single chain
*
* @author Andrew Rambaut
* @author Alexei Drummond
* @version $Id: LogFileTraces.java,v 1.4 2006/11/30 17:39:29 rambaut Exp $
*/
public class LogFileTraces extends AbstractTraceList {
public LogFileTraces(String name, File file) {
this.name = name;
this.file = file;
}
/**
* @return the name of this traceset
*/
public String getName() {
return name;
}
public File getFile() {
return file;
}
/**
* @return the last state in the chain
*/
public long getMaxState() {
return lastState;
}
public boolean isIncomplete() {
return false;
}
/**
* @return the number of states excluding the burnin
*/
public int getStateCount() {
// This is done as two integer divisions to ensure the same rounding for
// the burnin...
return (int) (((lastState - firstState) / stepSize) - (getBurnIn() / stepSize) + 1);
}
/**
* @return the number of states in the burnin
*/
public int getBurninStateCount() {
return (int) (getBurnIn() / stepSize);
}
/**
* @return the size of the step between states
*/
public long getStepSize() {
return stepSize;
}
public long getBurnIn() {
return burnIn;
}
/**
* @return the number of traces in this traceset
*/
public int getTraceCount() {
return traces.size();
}
/**
* @return the index of the trace with the given name
*/
public int getTraceIndex(String name) {
for (int i = 0; i < traces.size(); i++) {
Trace trace = getTrace(i);
if (name.equals(trace.getName())) {
return i;
}
}
return -1;
}
/**
* @return the name of the trace with the given index
*/
public String getTraceName(int index) {
return getTrace(index).getName();
}
/**
* @param index requested trace index
* @return the trace for a given index
*/
public Trace getTrace(int index) {
return traces.get(index);
}
public void setBurnIn(long burnin2) {
this.burnIn = (int) burnin2;
for (Trace trace : traces) {
trace.setTraceStatistics(null);
}
}
public double getStateValue(int trace, int index) {
return (Double) getTrace(trace).getValue(index + (int) (getBurnIn() / stepSize));
}
/**
* Read several consecutive values of one state into a destination array
*
* @param nState State index number
* @param destination array to store result
* @param offset first trace index
*/
public void getStateValues(int nState, double[] destination, int offset) {
final int index1 = nState + (int) (getBurnIn() / stepSize);
for (int k = 0; k < destination.length; ++k) {
destination[k] = (Double) getTrace(k + offset).getValue(index1);
}
}
/**
* Use the flag boolean[] filtered in FilteredTraceList
* to determine whether to remove filtered values,
* when filtered != null
* @param index the index of trace
* @param fromIndex low endpoint (inclusive) of the subList.
* @param toIndex high endpoint (exclusive) of the subList.
* @return
*/
public List getValues(int index, int fromIndex, int toIndex) {
List values = null;
try {
Trace trace = getTrace(index);
values = trace.getValues(fromIndex, toIndex, super.filtered);
} catch (IndexOutOfBoundsException e) {
System.err.println("getValues error: trace index = " + index);
}
return values;
}
public List getValues(int index) {
return this.getValues(index, getBurninStateCount(), getTrace(index).getValueCount());
}
public List getBurninValues(int index) {
return this.getValues(index, 0, getBurninStateCount());
}
/**
* Use the {@link #loadTraces(File) loadTraces} method,
* where <code>File</code> is defined from the constructor.
*
* @throws TraceException
* @throws IOException
*/
public void loadTraces() throws TraceException, IOException {
loadTraces(file);
}
/**
* Read through <code>File</code> created from a log file,
* fill in <code>traces</code> list, and set <code>TraceType</code>.
*
* @param file <code>File</code>
* @throws TraceException
* @throws IOException
*/
public void loadTraces(File file) throws TraceException, IOException {
final Reader reader = new FileReader(file);
loadTraces(reader);
reader.close();
}
/**
* Read through <code>InputStream</code> created from a log file,
* fill in <code>traces</code> list, and set <code>TraceType</code>.
*
* @param in <code>InputStream</code>
* @throws TraceException
* @throws IOException
*/
public void loadTraces(InputStream in) throws TraceException, IOException {
final Reader reader = new InputStreamReader(in);
loadTraces(reader);
reader.close();
}
/**
* Read through either <code>FileReader</code> or <code>InputStreamReader</code>
* created from a log file,
* fill in <code>traces</code> list, and set <code>TraceType</code>.
*
* @param r The input for <code>TrimLineReader</code>.
* Use either <code>FileReader</code> or <code>InputStreamReader</code>
* @throws TraceException
* @throws java.io.IOException
*/
private void loadTraces(Reader r) throws TraceException, java.io.IOException {
final TrimLineReader reader = new LogFileTraces.TrimLineReader(r);
// Read through to first token
StringTokenizer tokens = reader.tokenizeLine();
if (tokens == null) {
throw new TraceException("Trace file is empty.");
}
// read over empty lines
while (!tokens.hasMoreTokens()) {
tokens = reader.tokenizeLine();
}
// skip the first column which should be the state number
String token = tokens.nextToken();
// lines starting with [ are ignored, assuming comments in MrBayes file
// lines starting with # are ignored, assuming comments in Migrate or BEAST file
while (token.startsWith("[") || token.startsWith("#")) {
readTraceType(token, tokens); // using # to define type
tokens = reader.tokenizeLine();
// read over empty lines
while (!tokens.hasMoreTokens()) {
tokens = reader.tokenizeLine();
}
// read state token and ignore
token = tokens.nextToken();
}
// read label tokens
String[] labels = new String[tokens.countTokens()];
for (int i = 0; i < labels.length; i++) {
labels[i] = tokens.nextToken();
addTraceAndType(labels[i]);
}
int traceCount = getTraceCount();
long num_samples = 0;
String line = reader.readLine();
tokens = reader.getStringTokenizer(line);
String lastLine = line;
while (tokens != null && tokens.hasMoreTokens()) {
String stateString = tokens.nextToken();
long state = 0;
try {
try {
// Changed this to parseDouble because LAMARC uses scientific notation for the state number
state = (long) Double.parseDouble(stateString);
} catch (NumberFormatException nfe) {
throw new TraceException("Unable to parse state number in column 1 (Line " +
reader.getLineNumber() + ")");
}
if (num_samples < 1) {
// MrBayes puts 1 as the first state, BEAST puts 0
// In order to get the same gap between subsequent samples,
// we force this to 0.
if (state == 1) state = 0;
}
num_samples += 1;
if (!addState(state, num_samples)) {
throw new TraceException("State " + state + " is not consistent with previous spacing (Line " +
reader.getLineNumber() + ")");
}
} catch (NumberFormatException nfe) {
throw new TraceException("State " + state + ":Expected real value in column " + reader.getLineNumber());
}
for (int i = 0; i < traceCount; i++) {
if (tokens.hasMoreTokens()) {
String value = tokens.nextToken();
if (state == 0) assignTraceTypeAccordingValue(i, value);
try {
// values[i] = Double.parseDouble(tokens.nextToken());
addParsedValue(i, value);
} catch (NumberFormatException nfe) {
throw new TraceException("State " + state + ": Expected correct data type " +
"(Double, Integer or String) in column " + (i + 1) +
" (Line " + reader.getLineNumber() + ")");
}
} else {
throw new TraceException("State " + state + ": missing values at line " + reader.getLineNumber());
}
}
// used to keep the last valid line
lastLine = line;
// tokens = reader.tokenizeLine();
line = reader.readLine();
tokens = reader.getStringTokenizer(line);
}
if (num_samples == 0)
throw new TraceException("Incorrect file format, no sample is found !");
burnIn = lastState / 10;
if (lastState < 0)
lastState = firstState;
if (stepSize < 0 && lastState > 0)
stepSize = lastState;
validateTraceType(lastLine);
validateUniqueValues();
}
public static final int MIN_SAMPLE = 5; // used in StatisticsModel
private final int MAX_UNIQUE_VALUE = 200;
// change integer type into real, if too many unique values
private void validateUniqueValues() throws TraceException {
for (int id = 0; id < getTraceCount(); id++) {
Trace trace = getTrace(id);
if (trace.getTraceType().isInteger()) {
int uniqueValue = trace.getUniqueVauleCount();
if (uniqueValue > MAX_UNIQUE_VALUE) {
System.out.println("Too many unique values (>" + MAX_UNIQUE_VALUE +
") found in trace " + trace.getName() + " at " + id);
changeTraceType(id, TraceType.REAL);
}
}
}
}
// validate TraceType at the last value of trace,
// in case integer is logged for double values in the first (even several) row.
// it must use original line, because the data type of values in traces are changed
private void validateTraceType(String lastLine) throws TraceException {
String[] values = lastLine.split("\\t");
// the 1st is state
for (int i=1; i < values.length; i++) {
int traceId = i-1;
Trace trace = getTrace(traceId);
// avoid to assign integer to double incorrectly
if (trace.getTraceType().isInteger() && NumberUtils.hasDecimalPoint(values[i]))
changeTraceType(traceId, TraceType.REAL);
}
}
/**
* add a value for the n'th trace
*
* @param nTrace trace index
* @param value next value
*/
private void addParsedValue(int nTrace, String value) {
String name = getTraceName(nTrace);
// System.out.println(thisTrace.getTraceType() + " " + value);
if (tracesType.get(name).isNumber()) {
Double v = Double.parseDouble(value);
getTrace(nTrace).add(v);
} else {
getTrace(nTrace).add(value);
}
}
/**
* Auto assign INTEGER or CATEGORICAL type to traces
* according their values in the first line.
* Default type is REAL.
*
* @param nTrace
* @param value
*/
private void assignTraceTypeAccordingValue(int nTrace, String value) throws TraceException {
String name = getTraceName(nTrace);
TraceType type = TraceType.REAL;
if (NumberUtils.isNumber(value)) { // Double or Integer
if (! NumberUtils.hasDecimalPoint(value)) { // Integer
type = TraceType.INTEGER;
// change tracesType map for
tracesType.put(name, type);
System.out.println("Auto detect " + type + " type for trace " + name + " at " + nTrace);
changeTraceType(nTrace, type);
}
} else { // String
type = TraceType.CATEGORICAL;
tracesType.put(name, type);
System.out.println("Auto detect " + type + " type for trace " + name + " at " + nTrace);
changeTraceType(nTrace, type);
}
}
/**
* @deprecated should be replaced by
* {@link #assignTraceTypeAccordingValue(int, String) assignTraceTypeAccordingValue} method
*
* @param firstToken
* @param tokens
*/
private void readTraceType(String firstToken, StringTokenizer tokens) {
if (tokens.hasMoreTokens()) {
String token; //= tokens.nextToken();
if (firstToken.toLowerCase().contains(TraceType.INTEGER.toString())) {
while (tokens.hasMoreTokens()) {
token = tokens.nextToken();
tracesType.put(token, TraceType.INTEGER);
}
} else if (firstToken.toLowerCase().contains(TraceType.CATEGORICAL.toString())) {
while (tokens.hasMoreTokens()) {
token = tokens.nextToken();
tracesType.put(token, TraceType.CATEGORICAL);
}
}
}
}
//************************************************************************
// private methods
//************************************************************************
// These methods are used by the load function, above
/**
* Add a trace for a statistic of the given name
*
* @param name trace name
*/
private void addTraceAndType(String name) {
if (tracesType.get(name) == null) {
traces.add(createTrace(name, TraceType.REAL));
tracesType.put(name, TraceType.REAL);
} else {
traces.add(createTrace(name, tracesType.get(name)));
}
}
private Trace createTrace(String name, TraceType traceType) {
if (traceType.isNumber()) {
return new Trace<Double>(name, traceType);
} else {
return new Trace<String>(name, TraceType.CATEGORICAL);
}
}
// TODO get rid of generic to make things easy
// TODO change to String only, and parse to double, int or string in getValues according to trace type
public void changeTraceType(int id, TraceType newType) throws TraceException {
if (id >= getTraceCount() || id < 0)
throw new TraceException("Invalid trace id : " + id + ", which should 0 < and >= " + getTraceCount());
Trace trace = getTrace(id);
TraceType oldType = trace.getTraceType();
if (oldType != newType) {
Trace newTrace = createTrace(trace.getName(), newType);
if (newType.isDiscrete()) {
int uniqueValue = trace.getUniqueVauleCount();
if (uniqueValue > MAX_UNIQUE_VALUE)
throw new TraceException("Type change is failed, because too many unique values (>" +
MAX_UNIQUE_VALUE + ") are found !");
}
if (oldType.isCategorical() || newType.isCategorical()) {
try {
if (newType.isNumber()) { // oldType.isCategorical()
for (int i = 0; i < trace.getValueCount(); i++) {
newTrace.add(Double.parseDouble(trace.getValue(i).toString())); // String => Double
}
} else if (oldType.isContinuous()) { // newType.isCategorical()
for (int i = 0; i < trace.getValueCount(); i++) {
newTrace.add(trace.getValue(i).toString()); // Double => String
}
} else if (oldType.isIntegerOrBinary()) { // newType.isCategorical()
// treat Integer separately to rm .0 because Trace<Double>
for (int i = 0; i < trace.getValueCount(); i++) {
String value = trace.getValue(i).toString();
String valueNoDecimal = String.valueOf(value).split("\\.")[0];
newTrace.add(valueNoDecimal); // Integer => String
}
}
} catch (Exception e) {
throw new TraceException("Type change is failed, when parsing " + oldType +
" to " + newType + " in trace " + trace.getName());
}
if (newTrace.getValueCount() != trace.getValueCount())
throw new TraceException("Type change is failed, because values size is different after copy !");
traces.set(id, newTrace);
} else {
trace.setTraceType(newType); // change between numeric
}
System.out.println("Change " + oldType + " to " + newType + " type for trace " + trace.getName() + " at " + id);
}
}
/**
* Add a state number for these traces. This should be
* called before adding values for each trace. The spacing
* between stateNumbers should remain constant.
*
* @param stateNumber the state
* @param num_samples the number of samples (rows)
* @return false if the state number is inconsistent
*/
private boolean addState(long stateNumber, long num_samples) {
if (firstState < 0) { // it can use num_samples==1 to replace firstState < 0
firstState = stateNumber;
} else if (secondState < 0) {
secondState = stateNumber;
} else if (stepSize < 0) {
// delay setting of the stepSize until the step between
// the second and third step in case the first step is
// 1 (i.e., MrBayes) and the stepsize is 1.
stepSize = stateNumber - secondState;
} else {
long step = stateNumber - lastState;
if (step != stepSize) {
System.out.println("step: " + step + " != " + stepSize);
return false;
}
}
// System.out.println(num_samples + ": stateNumber=" + stateNumber + " lastState=" + lastState + " firstState=" + firstState + " stepSize=" + stepSize);
lastState = stateNumber;
return true;
}
protected final File file;
protected final String name;
private final List<Trace> traces = new ArrayList<Trace>();
public void addTrace(String newTName, int i) {
TraceCustomized tc = new TraceCustomized(newTName);
tc.addValues(traces.get(i)); // only Double
traces.add(tc);
tracesType.put(newTName, TraceType.REAL);
}
/**
* store INTEGER or STRING predefined at the top of log file, only used during loading files
* @deprecated should be replaced by
* {@link #assignTraceTypeAccordingValue(int, String) assignTraceTypeAccordingValue} method
*/
private TreeMap<String, TraceType> tracesType = new TreeMap<String, TraceType>();
private long burnIn = -1;
private long firstState = -1;
private long secondState = -1;
private long lastState = -1;
private long stepSize = -1;
public static class TrimLineReader extends BufferedReader {
public TrimLineReader(Reader reader) {
super(reader);
}
public String readLine() throws java.io.IOException {
lineNumber += 1;
String line = super.readLine();
if (line != null) return line.trim();
return null;
}
public StringTokenizer tokenizeLine() throws java.io.IOException {
String line = readLine();
return getStringTokenizer(line);
}
public StringTokenizer getStringTokenizer(String line) {
if (line == null) return null;
return new StringTokenizer(line, "\t");
}
public int getLineNumber() {
return lineNumber;
}
private int lineNumber = 0;
}
}