package org.seqcode.deepseq.events;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.seqcode.data.motifdb.WeightMatrix;
import org.seqcode.deepseq.experiments.ControlledExperiment;
import org.seqcode.deepseq.experiments.ExperimentCondition;
import org.seqcode.deepseq.experiments.ExperimentManager;
/**
* BindingManager stores lists of binding events and motifs associated with experiment conditions,
* and binding models associated with replicates.
* These data structures used to be under the relevant experiment components, but we moved them here to
* allow experiment loading to be independent.
*
* @author mahony
*
*/
public class BindingManager {
protected EventsConfig config;
protected ExperimentManager manager;
protected List<BindingEvent> events;
protected Map<ExperimentCondition, List <BindingEvent>> conditionEvents;
protected Map<ExperimentCondition, WeightMatrix> motifs;
protected Map<ExperimentCondition, WeightMatrix> freqMatrices;
protected Map<ExperimentCondition, Integer> motifOffsets;
protected Map<ControlledExperiment, BindingModel> models;
protected Map<ExperimentCondition, Integer> maxInfluenceRange;
public BindingManager(EventsConfig con, ExperimentManager exptman){
config = con;
manager = exptman;
events = new ArrayList<BindingEvent>();
conditionEvents = new HashMap<ExperimentCondition, List <BindingEvent>>();
motifs = new HashMap<ExperimentCondition, WeightMatrix>();
freqMatrices = new HashMap<ExperimentCondition, WeightMatrix>();
motifOffsets = new HashMap<ExperimentCondition, Integer>();
models = new HashMap<ControlledExperiment, BindingModel>();
maxInfluenceRange = new HashMap<ExperimentCondition, Integer>();
for(ExperimentCondition cond : manager.getConditions()){
conditionEvents.put(cond, new ArrayList<BindingEvent>());
motifOffsets.put(cond,0);
maxInfluenceRange.put(cond,0);
}
}
public List<BindingEvent> getBindingEvents(){return events;}
public List<BindingEvent> getConditionBindingEvents(ExperimentCondition ec){return conditionEvents.get(ec);}
public WeightMatrix getMotif(ExperimentCondition ec){return motifs.get(ec);}
public WeightMatrix getFreqMatrix(ExperimentCondition ec){return freqMatrices.get(ec);}
public Integer getMotifOffset(ExperimentCondition ec){return motifOffsets.get(ec);}
public BindingModel getBindingModel(ControlledExperiment ce){return models.get(ce);}
public Integer getMaxInfluenceRange(ExperimentCondition ec){return maxInfluenceRange.get(ec);}
public void setBindingEvents(List<BindingEvent> e){events =e;}
public void setConditionBindingEvents(ExperimentCondition ec, List<BindingEvent> e){conditionEvents.put(ec, e);}
public void setMotif(ExperimentCondition ec, WeightMatrix m){motifs.put(ec, m);}
public void setFreqMatrix(ExperimentCondition ec, WeightMatrix m){freqMatrices.put(ec, m);}
public void setMotifOffset(ExperimentCondition ec, Integer i){motifOffsets.put(ec, i);}
public void setBindingModel(ControlledExperiment ce, BindingModel mod){models.put(ce, mod);}
public void updateMaxInfluenceRange(ExperimentCondition ec){
int max=0;
for(ControlledExperiment rep : ec.getReplicates()){
if(getBindingModel(rep).getInfluenceRange()>max)
max=getBindingModel(rep).getInfluenceRange();
}maxInfluenceRange.put(ec, max);
}
/**
* Print the events
* @param outRoot
*/
public void printConditionEvents(ExperimentCondition ec, String outRoot){
try{
String outName = outRoot+"."+ec.getName()+".events";
if(config.getEventsFileTXTExtension())
outName = outName+".txt";
FileWriter fw = new FileWriter(outName);
fw.write(BindingEvent.fullHeadString()+"\n");
for(BindingEvent e : getConditionBindingEvents(ec)){
fw.write(e.toString()+"\n");
}
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Print the replicate counts at events
* @param outRoot
*/
public void printReplicateCounts(ExperimentCondition ec, String outRoot){
try{
String outName = outRoot+"."+ec.getName()+".repcounts";
FileWriter fw = new FileWriter(outName);
fw.write(BindingEvent.repCountHeadString()+"\n");
for(BindingEvent e : getConditionBindingEvents(ec)){
fw.write(e.getRepCountString()+"\n");
}
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* For each controlled experiment, simply calculate the proportion of reads in the provided
* list of binding events to everything else.
* @param regs
*/
public void estimateSignalVsNoiseFractions(List<BindingEvent> signalEvents){
for(ExperimentCondition c : manager.getConditions()){
for(ControlledExperiment r : c.getReplicates()){
double repSigCount =0, repNoiseCount=0;
for(BindingEvent event : signalEvents){
if(event.isFoundInCondition(c))
repSigCount += event.getRepSigHits(r);
}
repNoiseCount = r.getSignal().getHitCount() - repSigCount;
r.setSignalVsNoiseFraction(repSigCount/r.getSignal().getHitCount());
System.err.println(r.getName()+"\t"+r.getIndex()+"\tsignal-noise ratio:\t"+String.format("%.4f",r.getSignalVsNoiseFraction()));
}
}
}
/**
* Count the binding events present in a given condition
* @param cond
* @return
*/
public int countEventsInCondition(ExperimentCondition cond, double qMinThres){
int count=0;
for(BindingEvent e : events){
if(e.isFoundInCondition(cond) && e.getCondSigVCtrlP(cond) <=qMinThres)
count++;
}
return count;
}
/**
* Count the differential binding events present in a given pair of conditions
* @param cond
* @return
*/
public int countDiffEventsBetweenConditions(ExperimentCondition cond, ExperimentCondition othercond, double qMinThres, double diffPMinThres){
int count=0;
for(BindingEvent e : events){
if(e.isFoundInCondition(cond) && e.getCondSigVCtrlP(cond) <=qMinThres)
if(e.getInterCondP(cond, othercond)<=diffPMinThres && e.getInterCondFold(cond, othercond)>0)
count++;
}
return count;
}
/**
* Print all binding events to files
*/
public void writeBindingEventFiles(String filePrefix, double qMinThres, boolean runDiffTests, double diffPMinThres){
if(events.size()>0){
try {
//Full output table (all non-zero components)
String filename = filePrefix+".all.events.table";
if(config.getEventsFileTXTExtension())
filename = filename+".txt";
FileWriter fout = new FileWriter(filename);
fout.write(BindingEvent.fullHeadString()+"\n");
for(BindingEvent e : events)
fout.write(e.toString()+"\n");
fout.close();
//Per-condition event files
for(ExperimentCondition cond : manager.getConditions()){
//Sort on the current condition
BindingEvent.setSortingCond(cond);
Collections.sort(events, new Comparator<BindingEvent>(){
public int compare(BindingEvent o1, BindingEvent o2) {return o1.compareBySigCtrlPvalue(o2);}
});
String condName = cond.getName();
condName = condName.replaceAll("/", "-");
//Print events in MultiGPS format
filename = filePrefix+"_"+condName+".events";
if(config.getEventsFileTXTExtension())
filename = filename+".txt";
fout = new FileWriter(filename);
fout.write(BindingEvent.conditionHeadString(cond)+"\n");
for(BindingEvent e : events){
double Q = e.getCondSigVCtrlP(cond);
//Because of the ML step and component sharing, I think that an event could be assigned a significant number of reads without being "present" in the condition's EM model.
if(e.isFoundInCondition(cond) && Q <=qMinThres)
fout.write(e.getConditionString(cond)+"\n");
}
fout.close();
//Print events in BED
if(config.getPrintBED()){
String bedfilename = filePrefix+"_"+condName+".bed";
fout = new FileWriter(bedfilename);
fout.write(BindingEvent.conditionBEDHeadString(cond)+"\n");
for(BindingEvent e : events){
double Q = e.getCondSigVCtrlP(cond);
//Because of the ML step and component sharing, I think that an event could be assigned a significant number of reads without being "present" in the condition's EM model.
if(e.isFoundInCondition(cond) && Q <=qMinThres)
fout.write(e.getConditionBED(cond)+"\n");
}
fout.close();
}
}
//Differential event files
if(manager.getNumConditions()>1 && runDiffTests){
for(ExperimentCondition cond : manager.getConditions()){
//Sort on the current condition
BindingEvent.setSortingCond(cond);
Collections.sort(events, new Comparator<BindingEvent>(){
public int compare(BindingEvent o1, BindingEvent o2) {return o1.compareBySigCtrlPvalue(o2);}
});
for(ExperimentCondition othercond : manager.getConditions()){
if(!cond.equals(othercond)){
//Print diff events
String condName = cond.getName();
String othercondName = othercond.getName();
condName = condName.replaceAll("/", "-");
othercondName = othercondName.replaceAll("/", "-");
//MultiGPS format
filename = filePrefix+"_"+condName+"_gt_"+othercondName+".diff.events";
if(config.getEventsFileTXTExtension())
filename = filename+".txt";
fout = new FileWriter(filename);
fout.write(BindingEvent.conditionShortHeadString(cond)+"\n");
for(BindingEvent e : events){
double Q = e.getCondSigVCtrlP(cond);
//Because of the ML step and component sharing, I think that an event could be assigned a significant number of reads without being "present" in the condition's EM model.
if(e.isFoundInCondition(cond) && Q <=qMinThres){
if(e.getInterCondP(cond, othercond)<=diffPMinThres && e.getInterCondFold(cond, othercond)>0){
fout.write(e.getConditionString(cond)+"\n");
}
}
}
fout.close();
//Print events in BED
if(config.getPrintBED()){
filename = filePrefix+"_"+condName+"_gt_"+othercondName+".diff.bed";
fout = new FileWriter(filename);
fout.write(BindingEvent.diffConditionBEDHeadString(cond, othercond)+"\n");
for(BindingEvent e : events){
double Q = e.getCondSigVCtrlP(cond);
//Because of the ML step and component sharing, I think that an event could be assigned a significant number of reads without being "present" in the condition's EM model.
if(e.isFoundInCondition(cond) && Q <=qMinThres){
if(e.getInterCondP(cond, othercond)<=diffPMinThres && e.getInterCondFold(cond, othercond)>0){
fout.write(e.getConditionBED(cond)+"\n");
}
}
}
fout.close();
}
}
}
}
}
//If necessary, print per-replicate event base composition files
if(config.getCalcEventBaseCompositions()){
for(ExperimentCondition cond : manager.getConditions()){
for(ControlledExperiment rep : cond.getReplicates()){
//Print events
String repName = rep.getName();
repName = repName.replaceAll("/", "-");
repName = repName.replaceAll(":", "-");
filename = filePrefix+"_"+repName+".eventsbasecomps.txt";
fout = new FileWriter(filename);
fout.write("#Position\tEventRegA\tEventRegC\tEventRegG\tEventRegT\tEventTagA\tEventTagC\tEventTagG\tEventTagT\tBubbleRegA\tBubbleRegC\tBubbleRegG\tBubbleRegT\tBubbleTagA\tBubbleTagC\tBubbleTagG\tBubbleTagT\tBubbleIndex\n");
for(BindingEvent e : events){
double Q = e.getCondSigVCtrlP(cond);
if(e.isFoundInCondition(cond) && Q <=qMinThres){
float[] eventTagBases = e.getRepEventTagBases(rep);
float[] bubbleTagBases = e.getRepBubbleTagBases(rep);
float[] eventBases = e.getEventBases();
float[] bubbleBases = e.getBubbleBases();
float bubbleIndex = -1;
float expectedT=0, expectedACG=0;
if(bubbleBases[3]>0){
expectedT = bubbleTagBases[3] / bubbleBases[3];
float baseACG = bubbleBases[0]+bubbleBases[1]+bubbleBases[2];
if(baseACG>0)
expectedACG = (bubbleTagBases[0]+bubbleTagBases[1]+bubbleTagBases[2])/baseACG;
if(expectedACG>0)
bubbleIndex = expectedT/expectedACG;
}
fout.write(e.getPoint()+"\t"+
eventBases[0]+"\t"+eventBases[1]+"\t"+eventBases[2]+"\t"+eventBases[3]+"\t"+
eventTagBases[0]+"\t"+eventTagBases[1]+"\t"+eventTagBases[2]+"\t"+eventTagBases[3]+"\t"+
bubbleBases[0]+"\t"+bubbleBases[1]+"\t"+bubbleBases[2]+"\t"+bubbleBases[3]+"\t"+
bubbleTagBases[0]+"\t"+bubbleTagBases[1]+"\t"+bubbleTagBases[2]+"\t"+bubbleTagBases[3]+"\t"+
bubbleIndex+
"\n");
}
}
fout.close();
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* Print all binding events to files
*/
public void writeFullEventFile(String filename){
if(events.size()>0){
try {
if(config.getEventsFileTXTExtension())
filename = filename+".txt";
//Full dataset table
FileWriter fout = new FileWriter(filename);
fout.write(BindingEvent.fullHeadString()+"\n");
for(BindingEvent e : events)
fout.write(e.toString()+"\n");
fout.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* Print all motifs to file
*/
public void writeMotifFile(String filename){
try {
if(config.getEventsFileTXTExtension())
filename = filename+".txt";
//Full dataset table
FileWriter fout = new FileWriter(filename);
for(ExperimentCondition cond : manager.getConditions()){
if(getFreqMatrix(cond)!=null)
fout.write(WeightMatrix.printTransfacMatrix(getFreqMatrix(cond), cond.getName()));
}
fout.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Print replicate data counts to a file
*/
public void writeReplicateCounts(String filename){
if(events.size()>0){
try {
//Full dataset table
FileWriter fout = new FileWriter(filename);
fout.write(BindingEvent.repCountHeadString()+"\n");
for(BindingEvent e : events)
fout.write(e.getRepCountString()+"\n");
fout.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* Print all binding events to screen
* TESTING ONLY
*/
public void printBindingEvents(){
System.err.println(events.size()+" events found");
for(BindingEvent e : events){
System.err.println(e.toString());
}
}
}