/* * AminoAcids.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.evolution.datatype; /** * implements DataType for amino acids. * * @version $Id: AminoAcids.java,v 1.12 2005/06/22 14:48:19 beth Exp $ * * @author Andrew Rambaut * @author Korbinian Strimmer * @author Alexei Drummond * @author Matthew Goode */ public class AminoAcids extends DataType { /** * Name of data type. For XML and human reading of data type. */ public static final String DESCRIPTION = "amino acid"; /** * This integer is a unique identifier of this data type. */ public static final int TYPE = AMINO_ACIDS; /** * The only instance of the AminoAcids class. */ public static final AminoAcids INSTANCE = new AminoAcids(); /** * This character represents the amino acid equivalent of a stop codon to cater for * situations arising from converting coding DNA to an amino acid sequence. */ public static final char STOP_CHARACTER = '*'; public static final int STOP_STATE = 23; /** * This state represents a amino acid residue of unknown type. */ public static final int UNKNOWN_STATE = 24; /** * This state represents a gap in an amino acid sequence. */ public static final int GAP_STATE = 25; /** * Unique integer identifier for the amino acid data type. */ public static final int AMINOACIDS = 1; /** * A table to translate state numbers (0-25) into one letter codes. */ public static final char[] AMINOACID_CHARS= { 'A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R', 'S','T','V','W','Y','B','Z','X',AminoAcids.STOP_CHARACTER,DataType.UNKNOWN_CHARACTER,DataType.GAP_CHARACTER }; /** * A table to map state numbers (0-25) to their three letter codes. */ private static final String[] AMINOACID_TRIPLETS = { // A C D E F G H I K "Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", "Lys", // L M N P Q R S T V "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val", // W Y B Z X * ? - "Trp", "Tyr", "Asx", "Glx", " X ", " * ", " ? ", " - " }; /** * This table maps amino acid characters into state codes (0-25). * Amino Acids go ACDEFGHIKLMNPQRSTVWYBZX*?-, * Other letters; j, o, and u are mapped to ? * *, ? and - are mapped to themselves * All other chars are mapped to - */ public static final int[] AMINOACID_STATES = { 25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25, // 0-15 25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25, // 16-31 // * - 25,25,25,25,25,25,25,25,25,25,23,25,25,25,25,25, // 32-47 // ? 25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,24, // 48-63 // A B C D E F G H I j K L M N o 25, 0,20, 1, 2, 3, 4, 5, 6, 7,24, 8, 9,10,11,24, // 64-79 // P Q R S T u V W X Y Z 12,13,14,15,16,24,17,18,22,19,21,25,25,25,25,25, // 80-95 // A B C D E F G H I j K L M N o 25, 0,20, 1, 2, 3, 4, 5, 6, 7,24, 8, 9,10,11,24, // 96-111 // P Q R S T u V W X Y Z 12,13,14,15,16,24,17,18,22,19,21,25,25,25,25,25 // 112-127 }; /** * A table to map state numbers (0-25) to their ambiguities. */ private static final String[] AMINOACID_AMBIGUITIES = { // A C D E F G H I K "A", "C", "D", "E", "F", "G", "H", "I", "K", // L M N P Q R S T V "L", "M", "N", "P", "Q", "R", "S", "T", "V", // W Y B Z "W", "Y", "DN", "EQ", // X * ? - "ACDEFGHIKLMNPQRSTVWY", "*", "ACDEFGHIKLMNPQRSTVWY", "ACDEFGHIKLMNPQRSTVWY" }; /** * Private constructor - DEFAULT_INSTANCE provides the only instance. */ protected AminoAcids() { super(); stateCount = 20; ambiguousStateCount = 26; } @Override public char[] getValidChars() { return AMINOACID_CHARS; } /** * Get state corresponding to a character. * * @param c character * * @return state */ public int getState(final char c) { return AMINOACID_STATES[c]; } /** * Get state corresponding to a stop. * * @return state */ public static int getStopState() { return STOP_STATE; } /** * Get state corresponding to an unknown. * * @return state */ public int getUnknownState() { return AminoAcids.UNKNOWN_STATE; } /** * Get state corresponding to a gap. * * @return state */ public int getGapState() { return AminoAcids.GAP_STATE; } /** * Get character corresponding to a given state. * * @param state state * * @return corresponding character */ public char getChar(final int state) { return AminoAcids.AMINOACID_CHARS[state]; } /** * Get triplet string corresponding to a given state. * * @param state state * * @return corresponding triplet string */ public String getTriplet(final int state) { return AminoAcids.AMINOACID_TRIPLETS[state]; } /** * @param state the state to return the state set of. If this state is an ambiguity * code then the array returned will be size greater than 1. * @return an array containing the non-ambiguous states * that this state represents. */ public int[] getStates(final int state) { final String stateString = AminoAcids.AMINOACID_AMBIGUITIES[state]; final int[] states = new int[stateString.length()]; for (int i = 0; i < stateString.length(); i++) { states[i] = getState(stateString.charAt(i)); } return states; } /** * @param state the state to return the state set of. If this state is an ambiguity * code then the state set returned will have more than one true element in it. * @return an array containing the non-ambiguous states that this state represents. */ public boolean[] getStateSet(final int state) { final boolean[] stateSet = new boolean[stateCount]; for (int i = 0; i < stateCount; i++) { stateSet[i] = false; } final int len = AminoAcids.AMINOACID_AMBIGUITIES[state].length(); for (int i = 0; i < len; i++) { stateSet[getState(AMINOACID_AMBIGUITIES[state].charAt(i))] = true; } return stateSet; } /** * description of data type. * * @return string describing the data type */ public String getDescription() { return DESCRIPTION; } /** * type of data type. * * @return integer code for the data type */ public int getType() { return TYPE; } /** * @param c the char that is being tested to see if its a stop char. * @return true if this character is a stop */ public boolean isStopChar(final char c) { return isStopState(getState(c)); } /** * @param state the state that is being tested to see if its a stop state. * @return true if this state is a stop. */ public boolean isStopState(final int state) { return state == getStopState(); } }