package org.seqcode.genome.sequence; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; public class WildcardKmerUtils { /** A hash map the holds all the k-mer mapping functions (should be loaded) */ public static Map<String,Set<String>> wildcardMap = new HashMap<String,Set<String>>(); /** length of the k-mers */ public static int k; @SuppressWarnings("unchecked") public WildcardKmerUtils(int kmerLen) throws IOException { k = kmerLen; if(k ==8 ){ InputStream ins = this.getClass().getResourceAsStream("wildcard_8mer_2mismatch_hg19.txt"); BufferedReader br = new BufferedReader(new InputStreamReader(ins)); String line = null; while((line=br.readLine()) != null){ String[] pieces = line.split(","); Set<String> tmpAdd = new HashSet<String>(); for(int s=1; s<pieces.length; s++){ tmpAdd.add(pieces[s]); if(pieces[s].contains("N")){ // is this a wild-card kmer if(wildcardMap.containsKey(pieces[s])){ wildcardMap.get(pieces[s]).add(pieces[0]); }else{ wildcardMap.put(pieces[s], new HashSet<String>()); wildcardMap.get(pieces[s]).add(pieces[0]); } // now also the rev complement String rev = SequenceUtils.reverseComplement(pieces[s]); if(wildcardMap.containsKey(rev)){ wildcardMap.get(rev).add(pieces[0]); }else{ wildcardMap.put(rev, new HashSet<String>()); wildcardMap.get(rev).add(pieces[0]); } } } wildcardMap.put(pieces[0], tmpAdd); } } } /** * Convert a base to an int value * * @param base * @return */ public int base2int(char base) { int intVal = -1; switch (base) { case 'A': intVal = 0; break; case 'C': intVal = 1; break; case 'G': intVal = 2; break; case 'T': intVal = 3; break; case 'N': intVal = 4; break; default: throw new IllegalArgumentException("Invalid character: " + base); } return intVal; } /** * Return a base for the specified integer * * @param x * @return */ public char int2base(int x) { char base; switch (x) { case 0: base = 'A'; break; case 1: base = 'C'; break; case 2: base = 'G'; break; case 3: base = 'T'; break; case 4: base = 'N'; break; default: throw new IllegalArgumentException("Invalid int: " + x); } return (base); } public int seq2int(String seq) { int intVal = 0; int len = seq.length(); for (int i = 0; i < len; i++) { long currInt = base2int(seq.charAt(i)); if (currInt == -1) { return -1; } intVal = intVal * 5; intVal += currInt; } return intVal; } public String int2seq(long x, int kmerLen) { if (x >= (int)Math.pow(5, kmerLen)) { throw new IllegalArgumentException("Invalid int value, " + x + ", for kmerLen " + kmerLen); } StringBuffer seq = new StringBuffer(kmerLen); for (int i = 0; i < kmerLen; i++) { int baseVal = (int) (x % 5); seq.append(int2base(baseVal)); x = (long)Math.floor(x/5.0); } return seq.reverse().toString(); } public Set<String> map(String kmer){ return wildcardMap.get(kmer); } public static void main(String[] args) throws IOException, ClassNotFoundException{ WildcardKmerUtils wku = new WildcardKmerUtils(8); System.out.println(wku.wildcardMap.get("AAAAAAAA")); } }