package org.seqcode.data.seqdata.tools; import java.util.*; import java.sql.*; import java.io.*; import org.seqcode.data.seqdata.*; import org.seqcode.genome.Genome; import org.seqcode.genome.location.Gene; import org.seqcode.genome.location.Region; import org.seqcode.gsebricks.verbs.*; import org.seqcode.gsebricks.verbs.location.RefGeneGenerator; import org.seqcode.gseutils.Args; import org.seqcode.gseutils.NotFoundException; import org.seqcode.gseutils.Pair; import org.seqcode.projects.seqview.components.Snapshot; /** * Reads regions on STDIN, from a file, or uses a list such as all genes * and produces a line on STDOUT for each that lists either the analyses and binding * events for each. * * java org.seqcode.data.seqdata.tools.GetBindingInRegion --species "$MM;mm9" --genes refGene * java org.seqcode.data.seqdata.tools.GetBindingInRegion --species "$MM;mm9" --region 10:0-1000000 * java org.seqcode.data.seqdata.tools.GetBindingInRegion --species "$MM;mm9" --regionfile foo.txt * java org.seqcode.data.seqdata.tools.GetBindingInRegion --species "$MM;mm9" * this form above reads regions from STDIN * * --plots causes plots with the seqdata data to be produced. * */ public class GetBindingInRegion { private SeqDataLoader loader; private Genome genome; private List<RefGeneGenerator> geneGenerators; private List<Region> regions; private int plotExpand; private boolean plots; private String plotPrefix = ""; public GetBindingInRegion() throws SQLException, IOException { geneGenerators = new ArrayList<RefGeneGenerator>(); regions = new ArrayList<Region>(); loader = new SeqDataLoader(); plots = false; } public void parseArgs(String[] args) throws NotFoundException, IOException{ genome = Args.parseGenome(args).cdr(); geneGenerators = Args.parseGenes(args); List<Region> fromfile = Args.readLocations(args,"regionfile"); if (fromfile != null) { regions.addAll(fromfile); } regions.addAll(Args.parseRegions(args)); plots = Args.parseFlags(args).contains("plots"); plotExpand = Args.parseInteger(args,"plotexpand",2500); plotPrefix = Args.parseString(args,"plotprefix",""); } public void setGenome(Genome g) {genome = g;} public void setGenes(Collection<RefGeneGenerator> g) {geneGenerators.clear(); geneGenerators.addAll(g);} public void setRegions(Collection<Region> r) {regions.clear(); regions.addAll(r);} public void computeRegions() throws IOException, SQLException { for (RefGeneGenerator generator : geneGenerators) { Iterator<Gene> iter = generator.getAll(); while (iter.hasNext()) { regions.add(iter.next()); } } if (regions.size() == 0) { System.err.println("Reading regions from STDIN"); BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); String line = null; while ((line = reader.readLine()) != null) { regions.add(Region.fromString(genome, line)); } } Collections.sort(regions); } public List<Region> getRegions() {return regions;} public Collection<SeqAnalysis> getAnalysesForRegion(Region r) throws SQLException { return SeqAnalysis.withResultsIn(loader, r); } public void print(Region r) throws SQLException { System.out.print(r.toString()); for (SeqAnalysis a : getAnalysesForRegion(r)) { System.out.print("\t" + a.getName() + ";" + a.getVersion()); } System.out.println(); } public void plot(Region r) throws SQLException, NotFoundException, IOException { Collection<SeqAnalysis> analyses = getAnalysesForRegion(r); if (analyses.size() == 0) { return ;} r = r.expand(plotExpand,plotExpand); System.err.println("Doing plot for " + r); ArrayList<String> args = new ArrayList<String>(); args.add("--noexit"); args.add("--species"); args.add(String.format("%s;%s", genome.getSpeciesName(), genome.getVersion())); args.add("--picture"); args.add(plotPrefix + (r.toString().replaceAll(":","_")) + ".png"); args.add("--genes"); args.add("refGene"); args.add("--chrom"); args.add(String.format("%s:%d-%d",r.getChrom(), r.getStart(),r.getEnd())); Map<Pair<String,String>, List<String>> chipseqtracks = new HashMap<Pair<String,String>, List<String>>(); for (SeqAnalysis analysis : analyses) { args.add("--chipseqanalysis"); args.add(analysis.getName() + ";" + analysis.getVersion()); for (SeqAlignment align : analysis.getForeground()) { Pair<String,String> namever = new Pair<String,String>(align.getExpt().getName(), align.getName()); if (!chipseqtracks.containsKey(namever)) { chipseqtracks.put(namever, new ArrayList<String>()); } chipseqtracks.get(namever).add(align.getExpt().getReplicate()); } } for (Pair<String,String> p : chipseqtracks.keySet()) { args.add("--chipseq"); String arg = p.car(); for (String rep : chipseqtracks.get(p)) { arg = arg + ";" + rep; } arg = arg + ";" + p.cdr(); args.add(arg); } System.err.println("Args are " + args); String[] asarray = new String[args.size()]; for (int i = 0; i < args.size(); i++) { asarray[i] = args.get(i); } Snapshot.main(asarray); } public void close() { loader.close(); loader = null; for (RefGeneGenerator g : geneGenerators) { g.close(); } } public static void main(String args[]) { try { GetBindingInRegion get = new GetBindingInRegion(); get.parseArgs(args); get.computeRegions(); for (Region r : get.getRegions()) { if (get.plots) { get.plot(r); } else { get.print(r); } } get.close(); } catch (Exception e) { e.printStackTrace(); } System.exit(0); } }