/*
* EmpiricalCodonModelParser.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.oldevomodelxml.substmodel;
import java.util.logging.Logger;
import dr.evolution.datatype.Codons;
import dr.evolution.datatype.DataType;
import dr.evolution.datatype.GeneticCode;
import dr.evomodel.substmodel.*;
import dr.inference.model.Parameter;
import dr.oldevomodel.substmodel.EmpiricalCodonModel;
import dr.oldevomodel.substmodel.EmpiricalCodonRateMatrix;
import dr.oldevomodel.substmodel.FrequencyModel;
import dr.xml.*;
/**
* XML parser for ECM
*
* @author Stefan Zoller
*/
public class EmpiricalCodonModelParser extends AbstractXMLObjectParser {
public static final String EMPIRICAL_CODON_MODEL = "empiricalCodonModel";
public static final String EMPIRICAL_RATE_MATRIX = "empiricalRateMatrix";
public static final String ECM_DATA_DIR = "ecmDataDir";
public static final String ECM_DATA_MATRIX = "ecmRateFile";
public static final String ECM_FREQ_MATRIX = "ecmFreqFile";
public static final String OMEGA = "omega";
public static final String KAPPATSTV = "kappaTsTv";
public static final String MULTI_NT_CHANGE = "multiNtChange";
public String getParserName() { return EMPIRICAL_CODON_MODEL; }
public Object parseXMLObject(XMLObject xo) throws XMLParseException {
Codons codons = Codons.UNIVERSAL;
if (xo.hasAttribute(GeneticCode.GENETIC_CODE)) {
String codeStr = xo.getStringAttribute(GeneticCode.GENETIC_CODE);
if (codeStr.equals(GeneticCode.UNIVERSAL.getName())) {
codons = Codons.UNIVERSAL;
} else if (codeStr.equals(GeneticCode.VERTEBRATE_MT.getName())) {
codons = Codons.VERTEBRATE_MT;
} else if (codeStr.equals(GeneticCode.YEAST.getName())) {
codons = Codons.YEAST;
} else if (codeStr.equals(GeneticCode.MOLD_PROTOZOAN_MT.getName())) {
codons = Codons.MOLD_PROTOZOAN_MT;
} else if (codeStr.equals(GeneticCode.INVERTEBRATE_MT.getName())) {
codons = Codons.INVERTEBRATE_MT;
} else if (codeStr.equals(GeneticCode.CILIATE.getName())) {
codons = Codons.CILIATE;
} else if (codeStr.equals(GeneticCode.ECHINODERM_MT.getName())) {
codons = Codons.ECHINODERM_MT;
} else if (codeStr.equals(GeneticCode.EUPLOTID_NUC.getName())) {
codons = Codons.EUPLOTID_NUC;
} else if (codeStr.equals(GeneticCode.BACTERIAL.getName())) {
codons = Codons.BACTERIAL;
} else if (codeStr.equals(GeneticCode.ALT_YEAST.getName())) {
codons = Codons.ALT_YEAST;
} else if (codeStr.equals(GeneticCode.ASCIDIAN_MT.getName())) {
codons = Codons.ASCIDIAN_MT;
} else if (codeStr.equals(GeneticCode.FLATWORM_MT.getName())) {
codons = Codons.FLATWORM_MT;
} else if (codeStr.equals(GeneticCode.BLEPHARISMA_NUC.getName())) {
codons = Codons.BLEPHARISMA_NUC;
} else if (codeStr.equals(GeneticCode.NO_STOPS.getName())) {
codons = Codons.NO_STOPS;
}
}
Parameter omegaParam = (Parameter)xo.getElementFirstChild(OMEGA);
Parameter kappaParam = null;
Parameter mntParam = null;
if(xo.hasChildNamed(KAPPATSTV)) {
kappaParam = (Parameter)xo.getElementFirstChild(KAPPATSTV);
if(kappaParam.getDimension() != 2 && kappaParam.getDimension() != 9) {
throw new XMLParseException("If you use the kappa parameter, you need to enter exactly\n" +
"two values for ts and tv or nine values\n" +
"according to the Kosiol ECM+F+omega+9k model");
}
} else {
mntParam = (Parameter)xo.getElementFirstChild(MULTI_NT_CHANGE);
}
String dirString = xo.getStringAttribute(ECM_DATA_DIR);
String freqString = xo.getStringAttribute(ECM_FREQ_MATRIX);
String matString = xo.getStringAttribute(ECM_DATA_MATRIX);
EmpiricalCodonRateMatrix rateMat = new EmpiricalCodonRateMatrix(EMPIRICAL_RATE_MATRIX, codons,
dirString, freqString, matString);
// get frequencies from XML, from frequency csv file or estimate from data
FrequencyModel freqModel = null;
if (xo.getChild(FrequencyModel.class) != null) {
freqModel = (FrequencyModel)xo.getChild(FrequencyModel.class);
} else {
freqModel = createNewFreqModel(codons, rateMat);
}
return new EmpiricalCodonModel(codons, omegaParam, kappaParam, mntParam, rateMat, freqModel);
}
// creates new FrequencyModel from XML frequencies
private FrequencyModel createNewFreqModel(DataType codons, EmpiricalCodonRateMatrix type) throws XMLParseException {
double[] freqs = type.getFrequencies();
double sum = 0;
for (int j = 0; j < freqs.length; j++) {
sum += freqs[j];
}
if (Math.abs(sum - 1.0) > 1e-8) {
throw new XMLParseException("Frequencies do not sum to 1 (they sum to " + sum + ")");
}
FrequencyModel fm = new FrequencyModel(codons, freqs);
Logger.getLogger("dr.evomodel").info("Using frequencies from data file");
return fm;
}
//************************************************************************
// AbstractXMLObjectParser implementation
//************************************************************************
public String getParserDescription() {
return "This element represents the empirical model of codon evolution.";
}
public Class getReturnType() { return EmpiricalCodonModel.class; }
public XMLSyntaxRule[] getSyntaxRules() { return rules; }
private XMLSyntaxRule[] rules = new XMLSyntaxRule[] {
new StringAttributeRule(GeneticCode.GENETIC_CODE,
"The genetic code to use",
new String[] {
GeneticCode.UNIVERSAL.getName(),
GeneticCode.VERTEBRATE_MT.getName(),
GeneticCode.YEAST.getName(),
GeneticCode.MOLD_PROTOZOAN_MT.getName(),
GeneticCode.INVERTEBRATE_MT.getName(),
GeneticCode.CILIATE.getName(),
GeneticCode.ECHINODERM_MT.getName(),
GeneticCode.EUPLOTID_NUC.getName(),
GeneticCode.BACTERIAL.getName(),
GeneticCode.ALT_YEAST.getName(),
GeneticCode.ASCIDIAN_MT.getName(),
GeneticCode.FLATWORM_MT.getName(),
GeneticCode.BLEPHARISMA_NUC.getName(),
GeneticCode.NO_STOPS.getName()}, true),
new StringAttributeRule(ECM_DATA_DIR,
"The directory with the ECM data file",
"ecmdata", true),
new StringAttributeRule(ECM_DATA_MATRIX,
"The csv file with the ECM data matrix",
"matrix.csv", true),
new StringAttributeRule(ECM_FREQ_MATRIX,
"The csv file with the ECM frequency matrix",
"freqs.csv", true),
new ElementRule(OMEGA,
new XMLSyntaxRule[] { new ElementRule(Parameter.class) }),
new XORRule(
new ElementRule(KAPPATSTV,
new XMLSyntaxRule[] { new ElementRule(Parameter.class) }),
new ElementRule(MULTI_NT_CHANGE,
new XMLSyntaxRule[] { new ElementRule(Parameter.class) })),
new ElementRule(FrequencyModel.class, true)
};
}