package org.seqcode.motifs;
/*
*
*
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.seqcode.data.io.BackgroundModelIO;
import org.seqcode.data.io.FASTALoader;
import org.seqcode.data.motifdb.MarkovBackgroundModel;
import org.seqcode.data.motifdb.WMHit;
import org.seqcode.data.motifdb.WeightMatrix;
import org.seqcode.genome.Genome;
import org.seqcode.genome.Species;
import org.seqcode.genome.location.Gene;
import org.seqcode.genome.location.NamedRegion;
import org.seqcode.genome.location.Point;
import org.seqcode.genome.location.Region;
import org.seqcode.genome.location.ScoredStrandedRegion;
import org.seqcode.genome.location.StrandedPoint;
import org.seqcode.genome.sequence.SequenceGenerator;
import org.seqcode.genome.sequence.SequenceUtils;
import org.seqcode.gsebricks.verbs.location.ChromRegionIterator;
import org.seqcode.gsebricks.verbs.location.PointParser;
import org.seqcode.gsebricks.verbs.location.RefGeneGenerator;
import org.seqcode.gsebricks.verbs.location.RegionParser;
import org.seqcode.gsebricks.verbs.motifs.WeightMatrixScoreProfile;
import org.seqcode.gsebricks.verbs.motifs.WeightMatrixScorer;
import org.seqcode.gseutils.ArgParser;
import org.seqcode.gseutils.Args;
import org.seqcode.gseutils.NotFoundException;
import org.seqcode.gseutils.Pair;
import org.seqcode.viz.metaprofile.BinningParameters;
public class MotifAnalysisSandbox {
private Genome gen;
private WeightMatrix motif;
// Added by akshay, to read and store more than one motif. The above motif variable stores the first matrix in this list (not removed for compatibility)
private List<WeightMatrix> motifList = new ArrayList<WeightMatrix>();
private WeightMatrixScorer scorer;
private List<WeightMatrixScorer> scorerList;
private SequenceGenerator seqgen;
private ArrayList<Region> regions=null;
private ArrayList<Point> peaks=null;
private ArrayList<String> inFileLines;
private double motifThres=-100000;
private int averageRegWidth=0, maxRegWidth=0;
private int maxGeneDistance=100000;
private int motifDensityWinSize=1000000;
private int motifDensityStepSize=500000;
private double basemotifscore = 0.0;
public static void main(String[] args) throws IOException, ParseException {
boolean havePeaks=false;
boolean printHits=false, printBestHits=false, printHitInfo=false, printPeakClosestHits=false, printSeqs=false, printSeqsNoMotifs=false, printclosesthitorientation=false;
boolean printSeqsWithMotifs=false, printPeakInfo=false, printPeaksWithMotifs=false, printPeaksNoMotifs=false, printHitPoints=false;
boolean printMotifScore=false, printMotifPerc=false, printRankMotifPerc=false, motifDistHist=false, screenMotifs=false, scoreSeqs=false;
boolean motifDensity=false, pocc=false, wholeGenome=false, printprofiles=false, motifHisto=false;;
String seqFile=null;
String genomeSequencePath=null;
MotifAnalysisSandbox tools;
// Added by akshay. Tools list for multiple motifs. The above tools variable stores the first tool in this list (not removed for compatibility)
List<MotifAnalysisSandbox> toolsList = new ArrayList<MotifAnalysisSandbox>();
ArgParser ap = new ArgParser(args);
if((!ap.hasKey("species") && !ap.hasKey("geninfo")) || (!ap.hasKey("motiffile") && !ap.hasKey("weightMatfile") && !ap.hasKey("motifname"))) {
System.err.println("Usage:\n " +
"MotifAnalysisSandbox " +
"--species <organism;genome> OR\n" +
"--geninfo <genome info> AND --seq <path to seqs>\n" +
"--motiffile <file with motifs> AND --back <background>\nOR\n"+
"--weightMatfile <file with weight matrix, only one weight matrix>\n"+
"--motifname <weightmatrix name> AND"+
"--motifversion <weightmatrix version> \n"+
"--peaks <file containing coordinates of peaks> \n" +
"--wholegenome [motif analysis in whole genome]\n"+
"--win <window of sequence to take around peaks> "+
"--minscore <minimum motif score> " +
"--motifthres <file with thresholds> " +
"--threslevel <threshold level> " +
"--baseMotifScore <Min score to calculate summed LL>\n " +
"\nOPTIONS:\n" +
"--printhits " +
"--printbesthits " +
"--printhitinfo " +
"--printpeakclosesthits " +
"--printclosesthitorientation" +
"--printHitPoints" +
"--printseqs " +
"--printseqsnomotifs " +
"--printseqswithmotifs " +
"--screenmotifs " +
"--printpeaksnomotifs " +
"--printpeakswithmotifs " +
"--printpeakinfo " +
"--printmotifscore " +
"--printmotifperc " +
"--printrankmotifperc " +
"--motifdensity " +
"--motifdisthist \n" +
"--motifhisto \n" +
"--scoreseqs <seqFile> " +
"--pocc <seqFile>\n" +
"--printprofiles [motif-profiler style vectors] --bins <num bin for profile> --binsize <binsize for histo>\n");
return;
}
String motifversion=null, motifname=null, motiffile=null, backfile=null, weightmatfile=null;
boolean loadFromFile=false;
//Motifs
if(ap.hasKey("motifname")){
if(ap.hasKey("motifversion")){motifversion = ap.getKeyValue("motifversion");}
motifname = ap.getKeyValue("motifname");
if (motifname.indexOf(';') != -1) {
String[] pieces = motifname.split(";");
motifname = pieces[0];
motifversion = pieces[1];
}
}else if(ap.hasKey("motiffile")){
loadFromFile=true;
motiffile = ap.getKeyValue("motiffile");
backfile = ap.getKeyValue("back");
}else if(ap.hasKey("weightMatfile")){
loadFromFile=true;
weightmatfile=ap.getKeyValue("weightMatfile");
}
havePeaks = ap.hasKey("peaks");
String posFile = ap.getKeyValue("peaks");
int win = ap.hasKey("win") ? new Integer(ap.getKeyValue("win")).intValue():-1;
boolean usingWin= win>0;
int bins = ap.hasKey("bins") ? new Integer(ap.getKeyValue("bins")).intValue():1; //Used by profile printing
int binsize = ap.hasKey("binsize") ? new Integer(ap.getKeyValue("binsize")).intValue():10; //Used by motif histogram
double minscore = (ap.hasKey("minscore")) ? new Double(ap.getKeyValue("minscore")).doubleValue() : -10000;
double basemotifscore = (ap.hasKey("baseMotifScore")) ? new Double(ap.getKeyValue("baseMotifScore")).doubleValue() : 0;
genomeSequencePath = ap.hasKey("seq") ? ap.getKeyValue("seq") : null;
printHits = ap.hasKey("printhits");
printBestHits = ap.hasKey("printbesthits");
printHitInfo = ap.hasKey("printhitinfo");
printPeakClosestHits = ap.hasKey("printpeakclosesthits");
printSeqs = ap.hasKey("printseqs");
printSeqsNoMotifs = ap.hasKey("printseqsnomotifs");
printSeqsWithMotifs = ap.hasKey("printseqswithmotifs");
printPeaksNoMotifs = ap.hasKey("printpeaksnomotifs");
printPeaksWithMotifs = ap.hasKey("printpeakswithmotifs");
printPeakInfo = ap.hasKey("printpeakinfo");
printMotifScore = ap.hasKey("printmotifscore");
printMotifPerc = ap.hasKey("printmotifperc");
printRankMotifPerc = ap.hasKey("printrankmotifperc");
printclosesthitorientation = ap.hasKey("printclosesthitorientation");
motifDistHist= ap.hasKey("motifdisthist");
motifHisto= ap.hasKey("motifhisto");
printHitPoints = ap.hasKey("printHitPoints");
motifDensity= ap.hasKey("motifdensity");
screenMotifs= ap.hasKey("screenmotifs");
scoreSeqs = ap.hasKey("scoreseqs");
wholeGenome = ap.hasKey("wholegenome");
printprofiles = ap.hasKey("printprofiles");
String thresFile = ap.hasKey("motifthres") ? ap.getKeyValue("motifthres"):null;
double thresLevel = ap.hasKey("threslevel") ? new Double(ap.getKeyValue("threslevel")).doubleValue():0.05;
pocc = ap.hasKey("pocc");
if(scoreSeqs)
seqFile = ap.getKeyValue("scoreseqs");
if(pocc)
seqFile = ap.getKeyValue("pocc");
ArrayList<Region> posRegs = new ArrayList<Region>();
ArrayList<Point> posPeaks = new ArrayList<Point>();
ArrayList<String> posLines= new ArrayList<String>();
try {
Genome currgen=null;
Species currorg=null;
//Load genome
if(ap.hasKey("species")){
Pair<Species, Genome> pair = Args.parseGenome(args);
if(pair != null){
currorg = pair.car();
currgen = pair.cdr();
}
}else{
if(ap.hasKey("geninfo") || ap.hasKey("g")){
//Make fake genome... chr lengths provided
String fName = ap.hasKey("geninfo") ? ap.getKeyValue("geninfo") : ap.getKeyValue("g");
currgen = new Genome("Genome", new File(fName), true);
}else{
currgen = null;
}
}
//Load motif
WeightMatrix matrix = null;
List<WeightMatrix> matrixList = new ArrayList<WeightMatrix>();
if(loadFromFile){
if(ap.hasKey("motiffile")){
matrixList = loadMotifFromFile(motiffile, backfile, currgen);
matrix = matrixList.get(0);
}else if(ap.hasKey("weightMatfile")){
matrix = loadWeightMatrix(weightmatfile);
matrixList.add(matrix);
}
}else{
if(currorg!=null){
int wmid = WeightMatrix.getWeightMatrixID(currorg.getDBID(), motifname, motifversion);
matrix = WeightMatrix.getWeightMatrix(wmid);
}
}
if(thresFile!=null){
HashMap<String,Double> thres = loadThresholdsFromFile(thresFile, thresLevel);
if(thres.containsKey(matrix.getName()))
minscore = thres.get(matrix.getName());
}
//Load the positive hits
if(havePeaks){
File pFile = new File(posFile);
if(!pFile.isFile()){System.err.println("Invalid positive file name");System.exit(1);}
BufferedReader reader = new BufferedReader(new FileReader(pFile));
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
Point pt=null; Region reg=null;
String [] curr = line.split("\\s+");
String coord = curr[0];
if(!curr[0].contains("#")){
if(curr.length>=3 && curr[2].contains(":")){coord = curr[2];}
if(coord.contains(":")) {
String [] currB = coord.split(":");
String chrom = currB[0]; chrom=chrom.replaceFirst("chr", "");
char strand = '?';
if(currB.length==3)
strand = currB[2].charAt(0);
int location=-1, rstart=-1, rend=-1;
if(currB[1].contains("-")){
String [] currC = currB[1].split("-");
location = (new Integer(currC[0])+new Integer(currC[1]))/2;
rstart = new Integer(currC[0]);
rend = new Integer(currC[1]);
}else{
location = new Integer(currB[1]);
rstart = Math.max(0, location-(win/2));
rend = Math.min(0, location+(win/2));
}
if(strand!='?')
pt = new StrandedPoint(currgen, chrom, location, strand);
else
pt = new Point(currgen, chrom, location);
if(usingWin)
reg = pt.expand(win/2);
else
reg = new Region(currgen, chrom, rstart, rend);
}
if(pt!=null && reg!=null){
posPeaks.add(pt);
posRegs.add(reg);
posLines.add(line);
}
}
}
}else if(wholeGenome){
ChromRegionIterator chroms = new ChromRegionIterator(currgen);
while(chroms.hasNext()){
NamedRegion c = chroms.next();
posRegs.add(c);
}
}
////////////////////////////////////////////////////////////////////////
for(WeightMatrix wm : matrixList){
toolsList.add(new MotifAnalysisSandbox(currgen, wm, matrixList, posLines, posPeaks, posRegs, minscore,basemotifscore, genomeSequencePath));
}
tools = new MotifAnalysisSandbox(currgen, matrix, matrixList,posLines, posPeaks, posRegs, minscore, basemotifscore,genomeSequencePath);
if(printHits){
if(wholeGenome)
tools.printMotifHitsFullGenome();
else
tools.printMotifHits();
}
if(printBestHits)
tools.printBestMotifHits();
if(printPeakClosestHits)
tools.printPeakClosestMotifHits();
if(printHitInfo)
tools.printHitInfo();
if(printSeqs)
tools.printPeakSeqs();
if(printSeqsNoMotifs)
tools.printSeqsWithoutMotifHits();
if(printSeqsWithMotifs)
tools.printSeqsWithMotifHits();
if(printPeaksWithMotifs)
tools.printPeaksWithMotifs();
if(printPeaksNoMotifs)
tools.printPeaksWithoutMotifs();
if(printPeakInfo)
tools.printPeakInfo();
if(printMotifScore)
tools.motifScoreThreshold();
if(printMotifPerc)
tools.motifPercThreshold();
if(printRankMotifPerc)
tools.rankMotifPerc(200);
if(motifDistHist)
tools.peak2ClosestMotifHisto(binsize);
if(motifHisto)
tools.peak2motifHisto(binsize);
if(printHitPoints)
tools.printMotifHitsAndPoints();
if(printclosesthitorientation)
tools.printClosestMotifOrientation();
if(screenMotifs)
tools.screenMotifHits();
if(scoreSeqs && seqFile !=null){
if(screenMotifs)
tools.screenFastaFile(seqFile);
else
tools.scoreFastaFile(seqFile);
}
if(motifDensity)
tools.printMotifDensity();
if(pocc)
tools.pOccFastaFile(seqFile);
if(printprofiles)
tools.printMotifProfiles(win, bins);
////////////////////////////////////////////////////////////////////////
} catch (NotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//Constructor
public MotifAnalysisSandbox(Genome g, WeightMatrix wm, List<WeightMatrix> wmList,ArrayList<String> inL, ArrayList<Point> pospeak, ArrayList<Region> posreg, double minscore, double basescore, String genomeSequencePath){
gen=g;
motif= wm;
motifList = wmList;
inFileLines = inL;
peaks=pospeak;
regions=posreg;
averageRegWidth = findAvgWidth(regions);
maxRegWidth = findMaxWidth(regions);
motifThres = minscore;
basemotifscore = basescore;
seqgen = new SequenceGenerator(g);
if(genomeSequencePath != null){
seqgen.useCache(true);
seqgen.useLocalFiles(true);
seqgen.setGenomePath(genomeSequencePath);
}
scorer = new WeightMatrixScorer(motif, seqgen);
scorerList = new ArrayList<WeightMatrixScorer>();
for(WeightMatrix w : motifList){
scorerList.add(new WeightMatrixScorer(w, seqgen));
}
}
//Load weight matrix
public static WeightMatrix loadWeightMatrix(String filename) throws IOException{
WeightMatrix ret = null;
int[] indices = {'A','C','G','T'};
BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
boolean nameLoaded=false;
int matLen=0;
String line;
float[][] mat = new float[200][4];
while((line = br.readLine()) != null) {
line.trim();
if(line.length() > 0) {
String[] pieces = line.split("\t");
if(pieces[0].equals("DE")){
nameLoaded=true;
matLen=0;
}
else if(nameLoaded && (pieces.length==5 || pieces.length==6)){
//Load the matrix
for(int i = 1; i <=4 ; i++) {
mat[matLen][i-1] = Float.parseFloat(pieces[i]);
}
matLen++;
}
}
}
br.close();
ret = new WeightMatrix(matLen);
for(int i=0; i<matLen;i++){
for(int j=0; j<4; j++){
ret.matrix[i][indices[j]] = mat[i][j];
}
}
return ret;
}
//Load freq matrix
public static List<WeightMatrix> loadMotifFromFile(String filename, String backname, Genome gen) throws IOException, ParseException {
FreqMatrixImport motifImport = new FreqMatrixImport();
MarkovBackgroundModel back = BackgroundModelIO.parseMarkovBackgroundModel(backname, gen);
motifImport.setBackground(back);
return motifImport.readTransfacMatrices(filename);
}
private int findAvgWidth(ArrayList<Region> regs){
long total=0, num=0;
for(Region r: regs){
total+=r.getWidth();
num++;
}
return(num==0 ? -1 :(int)(total/num));
}
private int findMaxWidth(ArrayList<Region> regs){
int max=0;
for(Region r: regs)
if(r.getWidth()>max){max = r.getWidth();}
return(max);
}
//Find the best motif matches in the regions and report the distances to the peaks
public void motif2PeakHisto(){
int binSize = 20;
int numBins = (motif.length()+(averageRegWidth/2))/binSize;
double [] motifDistHisto = new double [numBins+1];
for(int h=0; h<=numBins; h++){motifDistHisto[h]=0;}
//Add the positive set scores
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
Point p = peaks.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
int bestMotifIndex = profiler.getMaxIndex();
double bestMotifScore = profiler.getMaxScore(bestMotifIndex);
if(bestMotifScore>=motifThres){
int peak2motif = Math.abs((p.getLocation()-r.getStart())-bestMotifIndex+(motif.length()/2));
motifDistHisto[peak2motif/binSize]++;
//System.out.println(p.getLocationString()+"\t"+peak2motif+"\t"+bestMotifScore);
}
}
System.out.println("\nMotif-to-Peak Histogram\nDistance\tMotifs");
for(int h=0; h<=numBins; h++){
int bin =h*binSize;
System.out.println(bin+"\t"+motifDistHisto[h]);
}
}
// Find the best closest motif match to the peaks and print the orientation
public void printClosestMotifOrientation(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
Point p = peaks.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
int closeDist=1000000;
boolean goodMotif=false;
String goodMotifOrientation="";
for(int z=0; z<r.getWidth(); z++){
int mpos = z+(motif.length()/2);
char currStrand= profiler.getMaxStrand();
double currScore = profiler.getMaxScore(z);
if(currScore >=motifThres && Math.abs((p.getLocation()-r.getStart())-mpos)<Math.abs(closeDist)){
closeDist = (p.getLocation()-r.getStart())-mpos;
goodMotif = true;
goodMotifOrientation = Character.toString(currStrand);
}
}
if(goodMotif){
System.out.println(p.getLocationString()+"\t"+goodMotifOrientation);
}
}
}
//Find the closest motif match to the peak (that's over the threshold)
public void peak2ClosestMotifHisto(int binS){
int binSize = binS;
int numBins = (motif.length()+(maxRegWidth/2))/binSize;
double [] motifDistHisto = new double [numBins+1];
for(int h=0; h<=numBins; h++){motifDistHisto[h]=0;}
//Add the positive set scores
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
Point p = peaks.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
int closeDist=1000000;
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
int mpos = z+(motif.length()/2);
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres && Math.abs((p.getLocation()-r.getStart())-mpos)<Math.abs(closeDist)){
closeDist = (p.getLocation()-r.getStart())-mpos;
goodMotif=true;
}
}
if(goodMotif){
//System.out.println(p.getLocationString()+"\t"+closeDist);
motifDistHisto[(Math.abs(closeDist))/binSize]++;
}
}
System.out.println("\nPeak-to-ClosestMotif Histogram\nDistance\tMotifs");
for(int h=0; h<=numBins; h++){
int bin =h*binSize;
System.out.println(bin+"\t"+motifDistHisto[h]);
}
}
//Histogram of all matches to the motif vs distance to peak
public void peak2motifHisto(int binSize){
int numBins = (motif.length()+(maxRegWidth/2))/binSize;
double [] motifDistHisto = new double [numBins+1];
for(int h=0; h<=numBins; h++){motifDistHisto[h]=0;}
//Add the positive set scores
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
Point p = peaks.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
int dist = (p.getLocation()-r.getStart())-z;
motifDistHisto[(Math.abs(dist))/binSize]++;
}
}
}
System.out.println("\nPeak-to-Motif Histogram\nDistance\tMotifCounts\tMotifCountsNorm");
for(int h=0; h<=numBins; h++){
int bin =h*binSize;
System.out.println(bin+"\t"+motifDistHisto[h]+"\t"+motifDistHisto[h]/(double)regions.size());
}
}
//Simple count of hits
public void countHits(){
//System.out.println("Counting hits");
int numHits=0, peaksWithHits=0, totalPeaks=0;
for(int i=0; i<regions.size(); i++){
totalPeaks++;
Region r = regions.get(i);
Point p = peaks.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
numHits++;
goodMotif=true;
}
}
if(goodMotif){
peaksWithHits++;
}
}
System.out.println(numHits+" hits in "+peaksWithHits+" peaks from "+totalPeaks+" total peaks.");
}
//printing the hit sequences
public void printMotifHits(){
int numHits=0, peaksWithHits=0, totalPeaks=0;
ArrayList<ScoredStrandedRegion> hits = new ArrayList<ScoredStrandedRegion>();
ArrayList<String> hitseqs = new ArrayList<String>();
for(int i=0; i<regions.size(); i++){
totalPeaks++;
Region r = regions.get(i);
String seq = seqgen.execute(r);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth()-motif.length()+1; z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
numHits++;
goodMotif=true;
String subseq = seq.substring(z, z+motif.length());
Region hitreg =new Region(gen, r.getChrom(), r.getStart()+z, r.getStart()+z+motif.length()-1);
hits.add(new ScoredStrandedRegion(gen, r.getChrom(), hitreg.getStart(), hitreg.getEnd(), currScore, profiler.getMaxStrand(z)));
if(profiler.getMaxStrand(z)=='+'){hitseqs.add(subseq);
}else{hitseqs.add(SequenceUtils.reverseComplement(subseq));}
}
}
if(goodMotif){
peaksWithHits++;
}
}
double perc = (double)peaksWithHits/(double)totalPeaks;
System.out.println(motif.name+" hits: "+numHits+" hits in "+peaksWithHits+" regions from "+totalPeaks+" total peaks ("+perc+").");
for(int h=0; h<hits.size(); h++){
//System.out.println(hits.get(h)+"\t"+hitseqs.get(h)+"\t"+SequenceUtils.reverseComplement(hitseqs.get(h)));
System.out.println(hits.get(h).toTabString()+"\t"+hitseqs.get(h));
}
}
//printing the best hit sequences
public void printBestMotifHits(){
int numHits=0, peaksWithHits=0, totalPeaks=0;
ArrayList<ScoredStrandedRegion> hits = new ArrayList<ScoredStrandedRegion>();
ArrayList<String> hitseqs = new ArrayList<String>();
ArrayList<Point> peaksWithHitsList = new ArrayList<Point>();
for(int i=0; i<regions.size(); i++){
totalPeaks++;
Region r = regions.get(i);
String seq = seqgen.execute(r);
WeightMatrixScoreProfile profiler = scorer.execute(r);
int bestMotifIndex = profiler.getMaxIndex();
double bestMotifScore = profiler.getMaxScore(bestMotifIndex);
if(bestMotifScore>=motifThres){
numHits++;
String subseq = seq.substring(bestMotifIndex, bestMotifIndex+motif.length());
Region hitreg =new Region(gen, r.getChrom(), r.getStart()+bestMotifIndex, r.getStart()+bestMotifIndex+motif.length()-1);
hits.add(new ScoredStrandedRegion(gen, r.getChrom(), hitreg.getStart(), hitreg.getEnd(), bestMotifScore, profiler.getMaxStrand(bestMotifIndex)));
if(profiler.getMaxStrand(bestMotifIndex)=='+'){hitseqs.add(subseq);
}else{hitseqs.add(SequenceUtils.reverseComplement(subseq));}
peaksWithHits++;
if(peaks.size()>0){
peaksWithHitsList.add(peaks.get(i));
}
}
}
double perc = (double)peaksWithHits/(double)totalPeaks;
System.err.println(motif.name+" hits: "+numHits+" hits in "+peaksWithHits+" regions from "+totalPeaks+" total peaks ("+perc+").");
for(int h=0; h<hits.size(); h++){
//System.out.println(hits.get(h)+"\t"+hitseqs.get(h)+"\t"+SequenceUtils.reverseComplement(hitseqs.get(h)));
System.out.println(peaksWithHitsList.get(h).getLocationString()+"\t"+hits.get(h).toTabString()+"\t"+hitseqs.get(h));
}
}
//printing hit info:
// best hit score
// num hits over threshold
// pOcc
public void printHitInfo(){
ArrayList<ScoredStrandedRegion> hits = new ArrayList<ScoredStrandedRegion>();
ArrayList<String> hitseqs = new ArrayList<String>();
for(int i=0; i<regions.size(); i++){
String outString=peaks.get(i).getLocationString();
Region r = regions.get(i);
String seq = seqgen.execute(r);
double conc = 0.0;
for(int m=0; m< motifList.size(); m++){
int numHits=0;
conc = Math.exp(-1*motifList.get(m).getMaxScore());
WeightMatrixScoreProfile profiler = scorerList.get(m).execute(r);
int bestMotifIndex = profiler.getMaxIndex();
double bestMotifScore = profiler.getMaxScore(bestMotifIndex);
for(int z=0; z<r.getWidth()-motifList.get(m).length()+1; z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
numHits++;
String subseq = seq.substring(z, z+motifList.get(m).length());
Region hitreg =new Region(gen, r.getChrom(), r.getStart()+z, r.getStart()+z+motifList.get(m).length()-1);
hits.add(new ScoredStrandedRegion(gen, r.getChrom(), hitreg.getStart(), hitreg.getEnd(), currScore, profiler.getMaxStrand(z)));
if(profiler.getMaxStrand(z)=='+'){hitseqs.add(subseq);
}else{hitseqs.add(SequenceUtils.reverseComplement(subseq));}
}
}
double pOcc = profiler2ProbOcc(profiler, conc);
double sumScore = sumPositiveLLScores(profiler);
outString = outString + "\t"+ bestMotifScore +"\t"+ numHits +"\t"+ pOcc+"\t"+sumScore+"\t";
}
while (outString.endsWith("\t")) {
outString = outString.substring(0, outString.length()-1);
}
//Print info
System.out.println(outString);
}
}
//printing the hit closest to the peak
public void printPeakClosestMotifHits(){
int numHits=0, peaksWithHits=0, totalPeaks=0;
ArrayList<ScoredStrandedRegion> hits = new ArrayList<ScoredStrandedRegion>();
ArrayList<String> hitseqs = new ArrayList<String>();
for(int i=0; i<regions.size(); i++){
totalPeaks++;
Region r = regions.get(i);
Point p = peaks.get(i);
String seq = seqgen.execute(r);
WeightMatrixScoreProfile profiler = scorer.execute(r);
int bestMotifIndex = profiler.getMaxIndex();
double bestMotifScore = profiler.getMaxScore(bestMotifIndex);
if(bestMotifScore>=motifThres){
int closestDist = Integer.MAX_VALUE;
int closestIndex = -1;
double closestScore=0.0;
for(int z=0; z<r.getWidth()-motif.length()+1; z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
int motifCenterCoord = z+(motif.length()/2)+r.getStart();
int dist = Math.abs(p.getLocation() - motifCenterCoord);
if(dist<closestDist){
closestDist = dist;
closestIndex = z;
closestScore = currScore;
}
}
}
numHits++;
String subseq = seq.substring(closestIndex, closestIndex+motif.length());
Region hitreg =new Region(gen, r.getChrom(), r.getStart()+closestIndex, r.getStart()+closestIndex+motif.length()-1);
hits.add(new ScoredStrandedRegion(gen, r.getChrom(), hitreg.getStart(), hitreg.getEnd(), closestScore, profiler.getMaxStrand(closestIndex)));
if(profiler.getMaxStrand(closestIndex)=='+'){hitseqs.add(subseq);
}else{hitseqs.add(SequenceUtils.reverseComplement(subseq));}
peaksWithHits++;
}
}
double perc = (double)peaksWithHits/(double)totalPeaks;
System.out.println("#"+motif.name+" hits: "+numHits+" hits in "+peaksWithHits+" regions from "+totalPeaks+" total peaks ("+perc+").");
for(int h=0; h<hits.size(); h++){
//System.out.println(hits.get(h)+"\t"+hitseqs.get(h)+"\t"+SequenceUtils.reverseComplement(hitseqs.get(h)));
System.out.println(hits.get(h).toTabString()+"\t"+hitseqs.get(h));
}
}
//printing the hit sequences
public void printMotifHitsFullGenome(){
int numHits=0, peaksWithHits=0, totalPeaks=0;
ArrayList<ScoredStrandedRegion> hits = new ArrayList<ScoredStrandedRegion>();
ArrayList<String> hitseqs = new ArrayList<String>();
for(int i=0; i<regions.size(); i++){
totalPeaks++;
Region r = regions.get(i);
int rstart = r.getStart();
int chunksize = 10000000;
int length = motif.length();
// work over the target region in pieces
while (rstart < r.getEnd()) {
int rend = rstart + chunksize;
if (rend > r.getEnd()) {
rend = r.getEnd();
}if (rend - rstart < length) {break;}
Region cr = new Region(r.getGenome(), r.getChrom(), rstart, rend);
String seq = seqgen.execute(cr);
WeightMatrixScoreProfile profiler = scorer.execute(cr);
boolean goodMotif=false;
for(int z=0; z<cr.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
numHits++;
goodMotif=true;
String subseq = seq.substring(z, z+motif.length());
Region hitreg =new Region(gen, r.getChrom(), cr.getStart()+z, cr.getStart()+z+motif.length()-1);
ScoredStrandedRegion ssr = new ScoredStrandedRegion(gen, r.getChrom(), hitreg.getStart(), hitreg.getEnd(), currScore, profiler.getMaxStrand(z));
String currseq = null;
if(profiler.getMaxStrand(z)=='+'){currseq = subseq;
}else{currseq = SequenceUtils.reverseComplement(subseq);}
System.out.println(ssr.toTabString()+"\t"+currseq);
}
}
if(goodMotif){
peaksWithHits++;
}
rstart = rend - length + 1;
}
}
double perc = (double)peaksWithHits/(double)totalPeaks;
System.out.println(motif.name+" hits: "+numHits+" hits in "+peaksWithHits+" regions from "+totalPeaks+" total peaks ("+perc+").");
}
//printing the hit sequences
public void printMotifHitsAndPoints(){
int numHits=0, peaksWithHits=0, totalPeaks=0;
ArrayList<ScoredStrandedRegion> hits = new ArrayList<ScoredStrandedRegion>();
ArrayList<Point> hitPoints = new ArrayList<Point>();
ArrayList<String> hitseqs = new ArrayList<String>();
for(int i=0; i<regions.size(); i++){
totalPeaks++;
Region r = regions.get(i);
String seq = seqgen.execute(r);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
numHits++;
goodMotif=true;
String subseq = seq.substring(z, z+motif.length());
Region hitreg =new Region(gen, r.getChrom(), r.getStart()+z, r.getStart()+z+motif.length()-1);
hits.add(new ScoredStrandedRegion(gen, r.getChrom(), hitreg.getStart(), hitreg.getEnd(), currScore, profiler.getMaxStrand(z)));
hitPoints.add(peaks.get(i));
if(profiler.getMaxStrand(z)=='+'){hitseqs.add(subseq);
}else{hitseqs.add(SequenceUtils.reverseComplement(subseq));}
}
}
if(goodMotif){
peaksWithHits++;
}
}
double perc = (double)peaksWithHits/(double)totalPeaks;
System.out.println(motif.name+" hits: "+numHits+" hits in "+peaksWithHits+" peaks from "+totalPeaks+" total peaks ("+perc+").");
for(int h=0; h<hits.size(); h++){
System.out.println(hitPoints.get(h)+"\t"+hits.get(h).toTabString()+"\t"+hitseqs.get(h)+"\t"+SequenceUtils.reverseComplement(hitseqs.get(h)));
//System.out.println(hits.get(h)+"\t"+hitseqs.get(h));
}
}
//Simple printing of peak lines that contain the motif
public void printPeaksWithMotifs(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres)
goodMotif=true;
}
if(goodMotif)
System.out.println(inFileLines.get(i));
}
}
//Simple printing of peak lines that don't contain the motif
public void printPeaksWithoutMotifs(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres)
goodMotif=true;
}
if(!goodMotif)
System.out.println(inFileLines.get(i));
}
}
//Print the sequences for each peak
public void printPeakSeqs(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
String seq = seqgen.execute(r);
seq = seq.toLowerCase();
System.out.println(">"+r.toString()+"\n"+seq);
}
}
//Screen out sequences without motifs occurences
public void printSeqsWithMotifHits(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
String seq = seqgen.execute(r);
seq = seq.toLowerCase();
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres)
{ goodMotif=true;
String mmatch = seq.substring(z, z+motif.length());
seq = seq.replaceAll(mmatch, mmatch.toUpperCase());
}
}
if(goodMotif)
System.out.println(">"+r.toString()+"\n"+seq);
}
}
//Screen out good motif hits from the input sequence
public void printSeqsWithoutMotifHits(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
String seq = seqgen.execute(r);
seq = seq.toLowerCase();
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres)
goodMotif=true;
}
if(!goodMotif)
System.out.println(">"+r.toString()+"\n"+seq);
}
}
//Screen out sequences containing good motif hits from the input sequence
public void screenMotifHits(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
char [] seq = seqgen.execute(r).toCharArray();
boolean [] covered = motifCoverage(r);
StringBuilder outseq = new StringBuilder();
for(int j=0; j<r.getWidth(); j++){
if(!covered[j]){
outseq.append(seq[j]);
}else{
outseq.append('N');
}
}
System.out.println(">"+r.toString()+"\n"+outseq.toString());
}
}
//Print score if > threshold
public void motifScoreThreshold(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
String seq = seqgen.execute(r);
WeightMatrixScoreProfile profiler = scorer.execute(seq);
boolean goodMotif=false;
double maxScore= profiler.getMaxScore();
if(maxScore>=motifThres)
goodMotif=true;
int index = profiler.getMaxIndex();
String bestSeq = seq.substring(index, index+motif.length());
if(profiler.getMaxStrand()=='-'){
bestSeq = SequenceUtils.reverseComplement(bestSeq);
}
String[] words = inFileLines.get(i).split("\\s+");
if(!goodMotif)
System.out.println(words[0]+"\t"+words[1]+"\t"+words[2]+"\t"+words[3]+"\t"+motif.getMinScore()+"\t"+bestSeq);
else
System.out.println(words[0]+"\t"+words[1]+"\t"+words[2]+"\t"+words[3]+"\t"+maxScore+"\t"+bestSeq);
}
}
//Simple printing of peak lines that contain the motif
public void rankMotifPerc(int stepSize){
double hasMotif=0;
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres)
goodMotif=true;
}
if(goodMotif)
hasMotif++;
if(i>0 && i%stepSize==0){
double perc = hasMotif/stepSize;
System.out.println((i-stepSize)+"-"+(i)+"\t"+perc);
hasMotif=0;
}
}
}
//Print score if > threshold
public void motifPercThreshold(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
double maxScore= profiler.getMaxScore();
if(maxScore>=motifThres)
goodMotif=true;
else
maxScore = motif.getMinScore();
String[] words = inFileLines.get(i).split("\\s+");
double perc = (maxScore-motif.getMinScore())/(motif.getMaxScore()-motif.getMinScore());
System.out.println(words[0]+"\t"+words[1]+"\t"+words[2]+"\t"+words[3]+"\t"+perc);
}
}
//Print representations of the motif-scoring landscape over each sequence
//Watch out for the minimum motif score in this; if the motif threshold is below zero, the default will look funky
public void printMotifScoreLandscape(){
for(int i=0; i<regions.size(); i++){
Region r = regions.get(i);
double [] scores = new double[r.getWidth()];
for(int j=0; i<r.getWidth(); j++){scores[j]=0;}
WeightMatrixScoreProfile profiler = scorer.execute(r);
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
for(int j=0; j<motif.length() && z+j<r.getWidth(); j++){
if(currScore>=scores[z+j]){
scores[z+j]=currScore;
}
}
}
}
System.out.print(r.toString());
for(int z=0; z<r.getWidth(); z++){
System.out.print("\t"+scores[z]);
}System.out.print("\n");
}
}
//Returns the hits to a motif in a region
private ArrayList<ScoredStrandedRegion> getMotifHits(Region r){
ArrayList<ScoredStrandedRegion> hits = new ArrayList<ScoredStrandedRegion>();
ArrayList<String> hitseqs = new ArrayList<String>();
String seq = seqgen.execute(r);
WeightMatrixScoreProfile profiler = scorer.execute(r);
boolean goodMotif=false;
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
goodMotif=true;
String subseq = seq.substring(z, z+motif.length());
Region hitreg =new Region(gen, r.getChrom(), r.getStart()+z, r.getStart()+z+motif.length()-1);
hits.add(new ScoredStrandedRegion(gen, r.getChrom(), hitreg.getStart(), hitreg.getEnd(), currScore, profiler.getMaxStrand(z)));
if(profiler.getMaxStrand(z)=='+'){hitseqs.add(subseq);
}else{hitseqs.add(SequenceUtils.reverseComplement(subseq));}
}
}
return(hits);
}
//returns the peaks overlapping a region
private ArrayList<Point> getOverlappingPeaks(Region r){
ArrayList<Point> overlapping = new ArrayList<Point>();
for(Point p : peaks){
if(r.contains(p)){
overlapping.add(p);
}
}return(overlapping);
}
//return an indicator array for covered/uncovered by good motif
private boolean[] motifCoverage(Region r){
boolean [] covered = new boolean [r.getWidth()];
for(int i=0; i<r.getWidth(); i++){covered[i]=false;}
WeightMatrixScoreProfile profiler = scorer.execute(r);
for(int z=0; z<r.getWidth(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
for(int j=0; j<motif.length() && z+j<r.getWidth(); j++){
covered[z+j]=true;
}
}
}
return(covered);
}
//return an indicator array for covered/uncovered by good motif
private boolean[] motifCoverage(String seq){
boolean [] covered = new boolean [seq.length()];
for(int i=0; i<seq.length(); i++){covered[i]=false;}
WeightMatrixScoreProfile profiler = scorer.execute(seq);
for(int z=0; z<seq.length(); z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
for(int j=0; j<motif.length() && z+j<seq.length(); j++){
covered[z+j]=true;
}
}
}
return(covered);
}
public void screenFastaFile(String inFile){
FASTALoader loader = new FASTALoader();
File f = new File(inFile);
Iterator<Pair<String, String>> it = loader.execute(f);
while(it.hasNext()){
Pair<String, String> p = it.next();
String name = p.car();
String sequence = p.cdr();
char [] seq = sequence.toCharArray();
boolean [] covered = motifCoverage(sequence);
StringBuilder outseq = new StringBuilder();
for(int j=0; j<seq.length; j++){
if(!covered[j]){
outseq.append(seq[j]);
}else{
outseq.append('N');
}
}
System.out.println(">"+name+"\n"+outseq.toString());
}
}
public void scoreFastaFile(String inFile){
FASTALoader loader = new FASTALoader();
File f = new File(inFile);
int numHits=0, seqsWithHits=0, totalSeqs=0;
Iterator<Pair<String, String>> it = loader.execute(f);
while(it.hasNext()){
Pair<String, String> p = it.next();
String name = p.car();
String seq = p.cdr();
WeightMatrixScoreProfile profiler = scorer.execute(seq);
boolean goodMotif=false;
for(int z=0; z<seq.length()-motif.length()+1; z++){
double currScore= profiler.getMaxScore(z);
if(currScore>=motifThres){
char maxStrand = profiler.getMaxStrand(z);
numHits++;
goodMotif=true;
String subseq = seq.substring(z, z+motif.length());
String revseq=SequenceUtils.reverseComplement(subseq);
if(maxStrand == '+')
System.out.println(name+"\t"+z+":+"+"\t"+currScore+"\t"+subseq+"\t"+revseq);
else
System.out.println(name+"\t"+z+":-"+"\t"+currScore+"\t"+revseq+"\t"+subseq);
}
}
if(goodMotif){
seqsWithHits++;
}
totalSeqs++;
}
double perc = (double)seqsWithHits/(double)totalSeqs;
System.out.println(motif.name+" hits: "+numHits+" hits in "+seqsWithHits+" peaks from "+totalSeqs+" total peaks ("+perc+").");
}
//Same as above except returning the probability of occupancy for each sequence
public void pOccFastaFile(String inFile){
double conc = Math.exp(-1*motif.getMaxScore());
FASTALoader loader = new FASTALoader();
File f = new File(inFile);
int numHits=0, seqsWithHits=0, totalSeqs=0;
Iterator<Pair<String, String>> it = loader.execute(f);
while(it.hasNext()){
Pair<String, String> p = it.next();
String name = p.car();
String seq = p.cdr();
WeightMatrixScoreProfile profiler = scorer.execute(seq);
double pOcc = profiler2ProbOcc(profiler, conc);
System.out.println(name+"\t"+pOcc);
}
}
public void printPeakInfo() {
String [] refs = new String[]{"refGene"};
ArrayList<Gene> closestGenes = findClosestGenes(refs);
for(int i=0; i<peaks.size(); i++){
Point p = peaks.get(i);
Region r = regions.get(i);
//String seq = seqgen.execute(r);
Gene closestGene = closestGenes.get(i);
int dist2Gene = closestGene.getName().equals("NONE") ? maxGeneDistance : Math.abs(p.getLocation() - closestGene.getFivePrime());
String[] words = inFileLines.get(i).split("\\s+");
System.out.println(words[0]+"\t"+words[1]+"\t"+words[2]+"\t"+words[3]+"\t"+closestGene.getName()+"\t"+closestGene.getID()+"\t"+dist2Gene);
//System.out.println(words[0]+"\t"+words[1]+"\t"+words[2]+"\t"+words[3]+"\t"+seq);
}
}
//Arguments: a list of enriched peaks and an array of annotation source names
private ArrayList<Gene> findClosestGenes(String [] geneAnnots){
ArrayList<Gene> closest = new ArrayList<Gene>();
if(geneAnnots!=null && geneAnnots.length>0){
for(int g=0; g<geneAnnots.length; g++){
RefGeneGenerator<Region> rgg = new RefGeneGenerator<Region>(gen, geneAnnots[g]);
for (Point p : peaks) {
int distToGene = maxGeneDistance+1;
Gene closestGene=null;
Region query = p.expand(maxGeneDistance);
Iterator<Gene> geneIter = rgg.execute(query);
while (geneIter.hasNext()) {
Gene gene = geneIter.next();
int distance = Math.abs(p.getLocation() - gene.getFivePrime());
if (distance < distToGene) {
distToGene = distance;
closestGene = gene;
}
}
if(distToGene<maxGeneDistance)
closest.add(closestGene);
else{
Gene none = new Gene(gen, "NONE", 1, 2, "NONE", "NONE", '+', "NONE");
closest.add(none);
}
}
}
}
return(closest);
}
//Print the motif density alongside the peaks density
private void printMotifDensity(){
Iterator<NamedRegion> chroms = new ChromRegionIterator(gen);
while (chroms.hasNext()) {
NamedRegion currentChrom = chroms.next();
for(int x=currentChrom.getStart(); x<=currentChrom.getEnd(); x+=motifDensityStepSize){
int y = x+motifDensityWinSize-1;
if(y>=currentChrom.getEnd()){y=currentChrom.getEnd()-1;}
Region currRegion = new Region(gen, currentChrom.getChrom(), x, y);
//Get the number of motif hits in the region
ArrayList<ScoredStrandedRegion> motifHits = getMotifHits(currRegion);
//Get the number of peaks in the region
ArrayList<Point> overlapPeaks = getOverlappingPeaks(currRegion);
double motifDensity = (double)(motifHits.size())/(double)currRegion.getWidth();
double peakDensity = (double)(overlapPeaks.size())/(double)currRegion.getWidth();
System.out.println(currRegion.getLocationString()+"\t"+motifDensity+"\t"+peakDensity);
}
}
}
//Convert a WeightMatrix score to a probability
private double score2Prob(double x, double conc){
double kd = Math.exp(-1*x);
return(conc/(kd+conc));
}
//Convert a WeightMatrixProfile into a probability of occupancy
public double profiler2ProbOcc(WeightMatrixScoreProfile profile, double conc){
double prod = 1;
for(int i=0; i<profile.length(); i++){
double currScore = profile.getMaxScore(i);
if(!Double.isNaN(currScore) && !Double.isInfinite(currScore))
prod*=(1-score2Prob(currScore, conc));
}
return(1-prod);
}
//Convert a WeightMatrixProfile into a probability of occupancy
public double sumPositiveLLScores(WeightMatrixScoreProfile profile){
double sum =0;;
for(int i=0; i<profile.length(); i++){
double currScore = profile.getMaxScore(i);
if(!Double.isNaN(currScore) && !Double.isInfinite(currScore) && currScore>basemotifscore)
sum += currScore;
}
return(sum);
}
//Load thresholds
public static HashMap<String,Double> loadThresholdsFromFile(String filename, double level){
HashMap<String,Double> motifThresholds = new HashMap<String,Double>();
if(filename == null){System.err.println("No threshold file specified");}
else{
int thresIndex=3;
try{
File bFile = new File(filename);
if(bFile.isFile()){
BufferedReader reader;
reader = new BufferedReader(new FileReader(bFile));
String firstLine = reader.readLine();
String [] tokens = firstLine.split("[\\s*\\t\\r\\n\\f]");
for(int i=3; i<tokens.length; i++){
String t = tokens[i];
if(t.startsWith("Thres")){
double val = new Double(t.replaceAll("Thres", "")).doubleValue();
if(val==level){
thresIndex=i;
}
}
}
String line;
while((line= reader.readLine())!=null){
tokens = line.split("[\\s*\\t\\r\\n\\f]");
String name = tokens[0];
double v = new Double(tokens[thresIndex]);
motifThresholds.put(name, v);
}
reader.close();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return(motifThresholds);
}
//Print motif-profiler style vectors -- percent-scores at binwidth resolution
public void printMotifProfiles(int winLen, int bins){
BinningParameters params = new BinningParameters(winLen, bins);
for(int x=0; x<peaks.size(); x++){
Point a = peaks.get(x);
double[] array = new double[params.getNumBins()];
for(int i = 0; i < array.length; i++) { array[i] = 0; }
int window = params.getWindowSize();
int left = window/2;
int right = window-left-1;
int start = Math.max(1, a.getLocation()-left);
int end = Math.min(a.getLocation()+right, a.getGenome().getChromLength(a.getChrom()));
Region query = new Region(gen, a.getChrom(), start, end);
boolean strand = (a instanceof StrandedPoint) ?
((StrandedPoint)a).getStrand() == '+' : true;
String seq = seqgen.execute(query);
WeightMatrixScoreProfile profiler = scorer.execute(seq);
for(int i=query.getStart(); i<query.getEnd(); i+=params.getBinSize()){
double maxScore=Double.MIN_VALUE;
int maxPos=0;
for(int j=i; j<i+params.getBinSize() && j<query.getEnd(); j++){
int offset = j-query.getStart();
if(profiler.getMaxScore(offset)>maxScore){
maxScore= profiler.getMaxScore(offset);
maxPos=offset;
}
}
if(maxScore>=motifThres){
int startbin, stopbin;
startbin = params.findBin(maxPos);
stopbin = params.findBin(maxPos+motif.length()-1);
if(!strand) {
int tmp = (params.getNumBins()-stopbin)-1;
stopbin = (params.getNumBins()-startbin)-1;
startbin = tmp;
}
double percScore = maxScore / motif.getMaxScore();
for(int k = startbin; k <= stopbin; k++) {
array[k] = Math.max(array[k],percScore);
}
}
}
//Print the vector
System.out.print(a);
for(int z=0; z<array.length; z++){
System.out.print("\t"+array[z]);
}System.out.print("\n");
}
}
}