package org.seqcode.projects.galaxyexo;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.List;
import org.seqcode.data.io.RegionFileUtilities;
import org.seqcode.deepseq.experiments.ControlledExperiment;
import org.seqcode.deepseq.experiments.ExperimentCondition;
import org.seqcode.deepseq.experiments.ExperimentManager;
import org.seqcode.deepseq.experiments.ExptConfig;
import org.seqcode.genome.GenomeConfig;
import org.seqcode.genome.location.StrandedPoint;
import org.seqcode.gseutils.ArgParser;
import org.seqcode.gseutils.Args;
import org.seqcode.math.stats.StatUtil;
/**
* Utility to quantify a nucleosome "valley" of a signal experiment relative to a control experiment.
* It creates a composite plot using reference points (such as plus one nucleosome positions), and
* finds the maximum and minimum read heights. The fold difference in the maximum and minimum read
* heights are compared to the control experiment to get the relative fold differences in read height.
*
* Input:
* - Genome
* - Signal experiment
* - Control experiment
* - Peak locations (plus one nucleosome)
* - Window around peaks in which to count reads
* Output:
* - A text file containing the enrichmemt
*
* @author naomi yamada
*/
public class NucleosomeEnrichmentProfiler {
protected ExperimentManager manager;
protected ExptConfig exptConfig;
protected FeatureCountsLoader featureCountsLoader;
protected String outbase;
protected int smoothingWidth;
public NucleosomeEnrichmentProfiler(String base,ExptConfig econf, ExperimentManager man, FeatureCountsLoader fcloader, int smoothing){
outbase = base;
featureCountsLoader = fcloader;
exptConfig = econf;
manager = man;
smoothingWidth=smoothing;
}
public void execute() throws FileNotFoundException{
File outFile = new File(outbase+File.separator+"nucleosome_enrichment.txt");
outFile.getParentFile().mkdirs();
PrintWriter writer = new PrintWriter(outFile);
for (ExperimentCondition condition : manager.getConditions()){
for (ControlledExperiment rep: condition.getReplicates()){
if ( !rep.hasControl()){
System.err.println("Please provide the control experiment to get statistics.");
System.exit(0);
}
double[] composite = featureCountsLoader.sampleComposite(rep);
double[] contComposite = featureCountsLoader.controlComposite();
// smooth composite plot
double[] smoothedSignal = StatUtil.gaussianSmoother(composite, smoothingWidth);
double[] smoothedControl = StatUtil.gaussianSmoother(contComposite, smoothingWidth);
double ratio = getMaxCounts(smoothedSignal)*getMinCounts(smoothedControl)/(getMinCounts(smoothedSignal)*getMaxCounts(smoothedControl));
writer.println("Sample Name "+rep.getSignal().getName()+"\tRatio: "+ratio+"\tSignal: "+rep.getSignal().getHitCount());
}
}
writer.close();
}
public double getMaxCounts(double[] array){
double maxCounts = 0;
for (int i = 0; i < array.length; i++){
if (array[i] > maxCounts)
maxCounts = array[i];
}
return maxCounts;
}
public double getMinCounts(double[] array){
double minCounts = Double.MAX_VALUE;
for (int i = 0; i < array.length; i++){
if (array[i] < minCounts)
minCounts = array[i];
}
return minCounts;
}
public static void main(String[] args) throws FileNotFoundException{
ArgParser ap = new ArgParser(args);
if((!ap.hasKey("peaks") && !ap.hasKey("regions")) ) {
System.err.println("please input peak files and region files.");
System.err.println("Usage:\n " +
"NucleosomeEnrichmentProfiler\n " +
"--geninfo <genome info file> \n " +
"--expt <file name> AND --ctrl <file name> AND --format <SAM/BAM/BED/IDX>\n " +
"--peaks <file containing coordinates of peaks> \n " +
"\nOPTIONS:\n " +
"--out <output directory (default = working directory)> \n " +
"--win <window of reads to take around peaks (default=200)> \n " +
"--bai <file path to bai file> \n " +
"--readshift <number of base pair for read shift (default=7)>\n " +
"--smooth <window of gaussian kernel smoothing (default=10)> \n " +
"");
System.exit(0);
}
GenomeConfig gconf = new GenomeConfig(args);
ExptConfig econf = new ExptConfig(gconf.getGenome(), args);
econf.setPerBaseReadFiltering(false);
ExperimentManager manager = new ExperimentManager(econf);
// parsing command line arguments
int win = Args.parseInteger(args, "win", 500);
int fivePrimeShift = Args.parseInteger(args,"readshift", 6);
int smooth = Args.parseInteger(args,"smooth", 10);
List<StrandedPoint> spoints = RegionFileUtilities.loadStrandedPointsFromFile(gconf.getGenome(), ap.getKeyValue("peaks"));
// Get outdir and outbase and make them;
String outbase = Args.parseString(args, "out", System.getProperty("user.dir"));
FeatureCountsLoader fcLoader = new FeatureCountsLoader(gconf, spoints, win);
fcLoader.setFivePrimeShift(fivePrimeShift);
NucleosomeEnrichmentProfiler profile = new NucleosomeEnrichmentProfiler(outbase,econf,manager, fcLoader,smooth);
profile.execute();
manager.close();
}
}