/*
* DataModelImporter.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.app.beastgen;
import dr.app.beauti.options.*;
import dr.app.util.Utils;
import dr.evolution.alignment.Alignment;
import dr.evolution.alignment.SimpleAlignment;
import dr.evolution.datatype.Nucleotides;
import dr.evolution.io.FastaImporter;
import dr.evolution.io.Importer;
import dr.evolution.io.Importer.ImportException;
import dr.evolution.io.NewickImporter;
import dr.evolution.io.NexusImporter;
import dr.evolution.io.NexusImporter.MissingBlockException;
import dr.evolution.io.NexusImporter.NexusBlock;
import dr.evolution.sequence.Sequence;
import dr.evolution.tree.Tree;
import dr.evolution.tree.TreeUtils;
import dr.evolution.util.Taxon;
import dr.evolution.util.TaxonList;
import dr.evolution.util.Units;
import org.jdom.JDOMException;
import java.io.*;
import java.util.*;
import java.util.List;
/**
* @author Andrew Rambaut
* @version $Id$
*/
public class DataModelImporter {
private final DateGuesser guesser;
public DataModelImporter() {
this.guesser = null;
}
public DataModelImporter(DateGuesser guesser) {
this.guesser = guesser;
}
public HashMap importFromFile(File file) throws IOException, Importer.ImportException {
HashMap dataModel = new HashMap();
try {
Reader reader = new FileReader(file);
BufferedReader bufferedReader = new BufferedReader(reader);
String line = bufferedReader.readLine();
while (line != null && line.length() == 0) {
line = bufferedReader.readLine();
}
if ((line != null && line.toUpperCase().contains("#NEXUS"))) {
// is a NEXUS file
importNexusFile(file, guesser, dataModel);
} else if ((line != null && line.trim().startsWith("" + FastaImporter.FASTA_FIRST_CHAR))) {
// is a FASTA file
importFastaFile(file, guesser, dataModel);
} else if ((line != null && (line.toUpperCase().contains("<?XML") || line.toUpperCase().contains("<BEAST")))) {
// assume it is a BEAST XML file and see if that works...
importBEASTFile(file, guesser, dataModel);
// } else {
// // assume it is a tab-delimited traits file and see if that works...
// importTraits(file);
} else {
throw new ImportException("Unrecognized format for imported file.");
}
bufferedReader.close();
} catch (IOException e) {
throw new IOException(e.getMessage());
}
return dataModel;
}
// xml
private void importBEASTFile(File file, DateGuesser guesser, Map dataModel) throws IOException, ImportException {
try {
FileReader reader = new FileReader(file);
BeastImporter importer = new BeastImporter(reader);
List<TaxonList> taxonLists = new ArrayList<TaxonList>();
List<Alignment> alignments = new ArrayList<Alignment>();
importer.importBEAST(taxonLists, alignments);
TaxonList taxa = taxonLists.get(0);
int count = 1;
for (Alignment alignment : alignments) {
String name = file.getName();
if (alignment.getId() != null && alignment.getId().length() > 0) {
name = alignment.getId();
} else {
if (alignments.size() > 1) {
name += count;
}
}
setData(dataModel, guesser, name, taxa, taxonLists, alignment, null, null, null);
count++;
}
reader.close();
} catch (JDOMException e) {
throw new ImportException(e.getMessage());
} catch (ImportException e) {
throw new ImportException(e.getMessage());
} catch (IOException e) {
throw new IOException(e.getMessage());
}
}
// nexus
private void importNexusFile(File file, DateGuesser guesser, Map dataModel) throws IOException, ImportException {
TaxonList taxa = null;
SimpleAlignment alignment = null;
List<Tree> trees = new ArrayList<Tree>();
List<NexusApplicationImporter.CharSet> charSets = new ArrayList<NexusApplicationImporter.CharSet>();
try {
FileReader reader = new FileReader(file);
NexusApplicationImporter importer = new NexusApplicationImporter(reader);
boolean done = false;
while (!done) {
try {
NexusBlock block = importer.findNextBlock();
if (block == NexusImporter.TAXA_BLOCK) {
if (taxa != null) {
throw new MissingBlockException("TAXA block already defined");
}
taxa = importer.parseTaxaBlock();
dataModel.put("taxa", createTaxonList(taxa));
} else if (block == NexusImporter.CALIBRATION_BLOCK) {
if (taxa == null) {
throw new MissingBlockException("TAXA or DATA block must be defined before a CALIBRATION block");
}
importer.parseCalibrationBlock(taxa);
} else if (block == NexusImporter.CHARACTERS_BLOCK) {
if (taxa == null) {
throw new MissingBlockException("TAXA block must be defined before a CHARACTERS block");
}
if (alignment != null) {
throw new MissingBlockException("CHARACTERS or DATA block already defined");
}
alignment = (SimpleAlignment) importer.parseCharactersBlock(taxa);
} else if (block == NexusImporter.DATA_BLOCK) {
if (alignment != null) {
throw new MissingBlockException("CHARACTERS or DATA block already defined");
}
// A data block doesn't need a taxon block before it
// but if one exists then it will use it.
alignment = (SimpleAlignment) importer.parseDataBlock(taxa);
if (taxa == null) {
taxa = alignment;
}
} else if (block == NexusImporter.TREES_BLOCK) {
// I guess there is no reason not to allow multiple trees blocks
// if (trees.size() > 0) {
// throw new MissingBlockException("TREES block already defined");
// }
Tree[] treeArray = importer.parseTreesBlock(taxa);
trees.addAll(Arrays.asList(treeArray));
if (taxa == null && trees.size() > 0) {
taxa = trees.get(0);
}
} else if (block == NexusApplicationImporter.ASSUMPTIONS_BLOCK) {
importer.parseAssumptionsBlock(charSets);
} else {
// Ignore the block..
}
} catch (EOFException ex) {
done = true;
}
}
reader.close();
// Allow the user to load taxa only (perhaps from a tree file) so that they can sample from a prior...
if (alignment == null && taxa == null) {
throw new MissingBlockException("TAXON, DATA or CHARACTERS block is missing");
}
} catch (IOException e) {
throw new IOException(e.getMessage());
} catch (ImportException e) {
throw new ImportException(e.getMessage());
// } catch (Exception e) {
// throw new Exception(e.getMessage());
}
setData(dataModel, guesser, file.getName(), taxa, null, alignment, charSets, null, trees);
}
// FASTA
private void importFastaFile(File file, DateGuesser guesser, Map dataModel) throws IOException, ImportException {
try {
FileReader reader = new FileReader(file);
FastaImporter importer = new FastaImporter(reader, Nucleotides.INSTANCE);
Alignment alignment = importer.importAlignment();
reader.close();
setData(dataModel, guesser, file.getName(), alignment, null, alignment, null, null, null);
} catch (ImportException e) {
throw new ImportException(e.getMessage());
} catch (IOException e) {
throw new IOException(e.getMessage());
}
}
public Map importFromTreeFile(String fileName, Map dataModel) throws IOException, Importer.ImportException {
Tree tree = null;
try {
Reader reader = new FileReader(fileName);
BufferedReader bufferedReader = new BufferedReader(reader);
String line = bufferedReader.readLine();
while (line != null && line.length() == 0) {
line = bufferedReader.readLine();
}
reader = new FileReader(fileName);
if ((line != null && line.toUpperCase().contains("#NEXUS"))) {
// is a NEXUS file
NexusImporter importer = new NexusImporter(reader);
tree = importer.importNextTree();
} else {
NewickImporter importer = new NewickImporter(reader);
tree = importer.importNextTree();
}
bufferedReader.close();
} catch (IOException e) {
throw new IOException(e.getMessage());
}
if (tree != null) {
dataModel.put("tree", TreeUtils.newick(tree));
}
return dataModel;
}
private boolean isMissingValue(String value) {
return (value.equals("?") || value.equals("NA") || value.length() == 0);
}
// public void importTraits(final File file, Map dataModel) throws Exception {
// List<TraitData> importedTraits = new ArrayList<TraitData>();
// Taxa taxa = options.taxonList;
//
// DataTable<String[]> dataTable = DataTable.Text.parse(new FileReader(file));
//
// String[] traitNames = dataTable.getColumnLabels();
// String[] taxonNames = dataTable.getRowLabels();
//
// for (int i = 0; i < dataTable.getColumnCount(); i++) {
// boolean warningGiven = false;
//
// String traitName = traitNames[i];
//
// String[] values = dataTable.getColumn(i);
// Class c = null;
// if (!isMissingValue(values[0])) {
// c = Utils.detectType(values[0]);
// }
// for (int j = 1; j < values.length; j++) {
// if (!isMissingValue(values[j])) {
// if (c == null) {
// c = Utils.detectType(values[j]);
// } else {
// Class c1 = Utils.detectType(values[j]);
// if (c == Integer.class && c1 == Double.class) {
// // change the type to double
// c = Double.class;
// }
//
// if (c1 != c &&
// !(c == Double.class && c1 == Integer.class) &&
// !warningGiven ) {
// System.err.println("Not all values of same type for trait" + traitName);
// warningGiven = true;
// }
// }
// }
// }
//
// TraitData.TraitType t = (c == Boolean.class || c == String.class || c == null) ? TraitData.TraitType.DISCRETE :
// (c == Integer.class) ? TraitData.TraitType.INTEGER : TraitData.TraitType.CONTINUOUS;
// TraitData newTrait = new TraitData(options, traitName, file.getName(), t);
//
// importedTraits.add(newTrait);
//
// int j = 0;
// for (final String taxonName : taxonNames) {
//
// final int index = taxa.getTaxonIndex(taxonName);
// Taxon taxon;
// if (index >= 0) {
// taxon = taxa.getTaxon(index);
// } else {
// taxon = new Taxon(taxonName);
// taxa.addTaxon(taxon);
// }
// if (!isMissingValue(values[j])) {
// taxon.setAttribute(traitName, Utils.constructFromString(c, values[j]));
// } else {
// // AR - merge rather than replace existing trait values
// if (taxon.getAttribute(traitName) == null) {
// taxon.setAttribute(traitName, "?");
// }
// }
// j++;
// }
// }
// setData(dataModel, file.getName(), taxa, null, null, null, importedTraits, null);
// }
// for Alignment
private void setData(Map dataModel, DateGuesser guesser, String fileName, TaxonList taxonList, List<TaxonList> taxonLists, Alignment alignment,
List<NexusApplicationImporter.CharSet> charSets,
List<TraitData> traits, List<Tree> trees) throws ImportException, IllegalArgumentException {
String fileNameStem = Utils.trimExtensions(fileName,
new String[]{"NEX", "NEXUS", "FA", "FAS", "FASTA", "TRE", "TREE", "XML", "TXT"});
checkTaxonList(taxonList, guesser);
dataModel.put("taxa", createTaxonList(taxonList));
dataModel.put("taxon_count", Integer.toString(taxonList.getTaxonCount()));
dataModel.put("taxon_count_minus_one", Integer.toString(taxonList.getTaxonCount()-1));
if (taxonLists != null) {
List<Map> tss = new ArrayList<Map>();
for (TaxonList tl : taxonLists) {
Map ts = new HashMap();
ts.put("id", tl.getId());
ts.put("taxa", createTaxonList(taxonList));
tss.add(ts);
}
dataModel.put("taxonSets", tss);
}
dataModel.put("alignment", createAlignment(alignment));
dataModel.put("site_count", Integer.toString(alignment.getSiteCount()));
dataModel.put("filename", fileName);
dataModel.put("filename_stem", fileNameStem);
}
private void checkTaxonList(TaxonList taxonList, DateGuesser guesser) throws ImportException {
// check the taxon names for invalid characters
boolean foundAmp = false;
for (Taxon taxon : taxonList) {
String name = taxon.getId();
if (name.indexOf('&') >= 0) {
foundAmp = true;
}
}
if (foundAmp) {
throw new ImportException("One or more taxon names include an illegal character ('&').\n" +
"These characters will prevent BEAST from reading the resulting XML file.\n\n" +
"Please edit the taxon name(s) before reloading the data file.");
}
if (guesser != null) {
guesser.guessDates(taxonList);
} else {
// make sure they all have dates...
for (int i = 0; i < taxonList.getTaxonCount(); i++) {
if (taxonList.getTaxonAttribute(i, "date") == null) {
Date origin = new Date(0);
dr.evolution.util.Date date = dr.evolution.util.Date.createTimeSinceOrigin(0.0, Units.Type.YEARS, origin);
taxonList.getTaxon(i).setAttribute("date", date);
}
}
}
}
private List<Map> createTaxonList(TaxonList taxa) {
List<Map> tl = new ArrayList<Map>();
for (Taxon taxon : taxa) {
tl.add(createTaxon(taxon));
}
return tl;
}
private Map createTaxon(Taxon taxon) {
Map t = new HashMap();
t.put("id", taxon.getId());
if (taxon.getDate() != null) {
t.put("date", Double.toString(taxon.getDate().getTimeValue()));
}
return t;
}
private Map createAlignment(Alignment alignment) {
Map a = new HashMap();
a.put("id", (alignment.getId() != null ? alignment.getId() : "alignment"));
List<Map> ss = new ArrayList<Map>();
for (int i = 0; i < alignment.getSequenceCount(); i++) {
Sequence sequence = alignment.getSequence(i);
ss.add(createSequence(sequence));
}
a.put("sequences", ss);
return a;
}
private Map createSequence(Sequence sequence) {
Map s = new HashMap();
s.put("taxon", createTaxon(sequence.getTaxon()));
s.put("data", sequence.getSequenceString());
return s;
}
}