package org.seqcode.genome.sequence.seqfunctions; /** * Simply converts the sequence into a 4xL frequency matrix * @author mahony * */ public class BaseFrequencyFunction implements SeqFunction{ //Variables final int scoreDimension = 4; final int scoringOffset = 0; final int scoreWindowSize = 1; final boolean isBetweenNucs = false; final String[] labels = {"A", "C", "G", "T"}; final String description = "Base frequencies"; public double[][] score(String seq) throws SeqFunctionException { if(seq.length()<scoreWindowSize) throw new SeqFunctionException("Sequence too short for BaseFrequencyFunction"); double [][] scores = new double[scoreDimension][seq.length()]; String seqU = seq.toUpperCase(); for(int i=0; i<seqU.length(); i++){ char b = seqU.charAt(i); for(int x=0; x<scoreDimension; x++){ scores[x][i]=0;} switch(b){ case 'A': scores[0][i]=1; break; case 'C': scores[1][i]=1; break; case 'G': scores[2][i]=1; break; case 'T': scores[3][i]=1; break; case 'N': default: scores[0][i]=0.25; scores[1][i]=0.25; scores[2][i]=0.25; scores[3][i]=0.25; break; case 'R' : scores[0][i]=0.5; scores[2][i]=0.5; break; case 'Y' : scores[3][i]=0.5; scores[1][i]=0.5; break; case 'M' : scores[0][i]=0.5; scores[1][i]=0.5; break; case 'K' : scores[2][i]=0.5; scores[3][i]=0.5; break; case 'S' : scores[1][i]=0.5; scores[2][i]=0.5; break; case 'W' : scores[0][i]=0.5; scores[3][i]=0.5; break; } } return scores; } public int scoreDimension() { return scoreDimension; } public int scoringOffset() { return scoringOffset; } public int scoreWindowSize() { return scoreWindowSize; } public boolean isBetweenNucleotides() { return isBetweenNucs; } public String[] dimensionLabels() { return labels; } public String scoreDescription() { return description; } public double getMaxScore(){return 1.0;} public double getMinScore(){return 0.0;} }