package org.seqcode.data.readdb;
import org.apache.commons.cli.*;
import java.util.*;
import java.io.*;
/**
* <p>Query hits from the database. Reads chrom:start-stop:strand values from
* stdin. Repeats them on stdout along with the hit positions.
*
* <p>Usage:
* <pre>
* echo "1:100-6000" | java org.seqcode.data.readdb.Query [--hostname nanog.csail.mit.edu --port 52000 --user foo --passwd bar] [--quiet] --align 100
* </pre>
* <ul>
* <li>--quiet means don't print any output. This is useful for testing the query performance
* without worrying about the time it takes to print the output.
* <li>--wiggle means output in wiggle format
* <li>--bed means output in BED format
* <li>--type2 means to query the type2 single reads.
* <li>--paired means to query the paired reads.
* <li>--noheader means to skip printing the input region in the outpu
* <li>--right means to query the right side reads rather than left.
*/
public class Query {
private String alignname;
private String hostname;
private String username, password;
private int portnum, histogram = -1;
private boolean quiet, weights, isType2, paired, isleft, noheader, bed, wiggle;
public static void main(String args[]) throws Exception {
Query query = new Query();
query.parseArgs(args);
query.run(System.in);
}
public void parseArgs(String args[]) throws IllegalArgumentException, ParseException {
Options options = new Options();
options.addOption("H","hostname",true,"server to connect to");
options.addOption("P","port",true,"port to connect to");
options.addOption("a","align",true,"alignment name");
options.addOption("u","user",true,"username");
options.addOption("p","passwd",true,"password");
options.addOption("q","quiet",false,"quiet: don't print output");
options.addOption("w","weights",false,"get and print weights in addition to positions");
options.addOption("g","histogram",true,"produce a histogram with this binsize instead of printing all read positions");
options.addOption("t2","type2",false,"work on type2 single-end alignment?");
options.addOption("d","paired",false,"work on paired alignment?");
options.addOption("r","right",false,"query right side reads when querying paired alignments");
options.addOption("N","noheader",false,"skip printing the query header");
options.addOption("W","wiggle",true,"output in wiggle format with the specified bin format");
options.addOption("B","bed",false,"output in BED format");
options.addOption("h","help",false,"print help message");
CommandLineParser parser = new GnuParser();
CommandLine line = parser.parse( options, args, false );
if (line.hasOption("help")) {
printHelp();
System.exit(0);
}
if (line.hasOption("port")) {
portnum = Integer.parseInt(line.getOptionValue("port"));
} else {
portnum = -1;
}
if (line.hasOption("hostname")) {
hostname = line.getOptionValue("hostname");
} else {
hostname = null;
}
if (line.hasOption("align")) {
alignname = line.getOptionValue("align");
} else {
alignname = null;
}
if (line.hasOption("user")) {
username = line.getOptionValue("user");
} else {
username = null;
}
if (line.hasOption("passwd")) {
password = line.getOptionValue("passwd");
} else {
password = null;
}
if (line.hasOption("histogram")) {
histogram = Integer.parseInt(line.getOptionValue("histogram"));
}
quiet = line.hasOption("quiet");
weights = line.hasOption("weights");
isType2 = line.hasOption("type2");
paired = line.hasOption("paired");
isleft = !line.hasOption("right");
noheader = line.hasOption("noheader");
bed = line.hasOption("bed");
if (bed) {
histogram = 0;
noheader = true;
if (paired) {
System.err.println("Can't do BED formatted output in paired-end mode");
}
}
if (line.hasOption("wiggle")) {
wiggle = true;
histogram = Integer.parseInt(line.getOptionValue("wiggle"));
}
}
public void printHelp() {
System.out.println("Query ReadDB. Regions are read on STDIN and output is printed on STDOUT.");
System.out.println("usage: java org.seqcode.data.readdb.Query --align alignmentname < regions.txt");
System.out.println(" [--quiet] don't print any output. Useful for performance testing without worrying about output IO time");
System.out.println(" [--weights] include weights in output or use weights for histogram");
System.out.println(" [--type2] query type2 single-end reads");
System.out.println(" [--paired] query paired reads");
System.out.println(" [--right] when querying paired reads for histograms, use right read rather than left");
System.out.println(" [--histogram 10] output a histogram of read counts or weights using a 10bp bin size");
System.out.println(" [--noheader] don't output query regions in the output");
System.out.println(" [--bed] output hit positions in BED format (doesn't work with paired reads)");
System.out.println(" [--wiggle 10] output a histogram in wiggle format with 10bp bin size");
System.out.println("");
System.out.println("Lines in the input should be of them form");
System.out.println("3:1000-2000");
}
public void run(InputStream instream) throws IOException, ClientException {
BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
String line;
Client client;
if (hostname != null && portnum != -1 && username != null && password != null) {
client = new Client(hostname,
portnum,
username,
password);
} else {
client = new Client();
}
while ((line = reader.readLine()) != null) {
try {
String pieces[] = line.split("[\\:]");
int chr = Integer.parseInt(pieces[0].replaceFirst("^chr",""));
Boolean strand = pieces.length >= 3 ? pieces[2].equals("+") : null;
pieces = pieces[1].split("\\-");
int start = Integer.parseInt(pieces[0]);
int stop = Integer.parseInt(pieces[1]);
if (wiggle) {
System.out.println(String.format("variableStep chrom=chr%d span=%d",chr, histogram));
}
if (histogram > 0) {
TreeMap<Integer,Integer> hits = client.getHistogram(alignname,
chr,
isType2,
paired,
0,
histogram,
start,
stop,
null,
strand);
TreeMap<Integer,Float> weightsmap = null;
if (weights) {
weightsmap = client.getWeightHistogram(alignname,
chr,
isType2,
paired,
0,
histogram,
start,
stop,
null,
strand);
}
if (!quiet) {
for (int i : hits.keySet()) {
if (weights) {
if (wiggle) {
System.out.println(String.format("%d\t%f", i,weightsmap.get(i)));
} else {
System.out.println(String.format("%d\t%d\t%f", i, hits.get(i), weightsmap.get(i)));
}
} else {
System.out.println(String.format("%d\t%d", i, hits.get(i)));
}
}
}
} else {
if (paired) {
List<PairedHit> hits = client.getPairedHits(alignname,
chr,
isleft,
start,
stop,
null,
strand);
if (!quiet) {
if (!noheader) {
System.out.println(line);
}
for (PairedHit h : hits) {
System.out.println(h.toString());
}
}
} else {
List<SingleHit> hits = client.getSingleHits(alignname,
chr,
isType2,
start,
stop,
null,
strand);
if (!quiet) {
if (!noheader) {
System.out.println(line);
}
for (SingleHit h : hits) {
if (bed) {//BED is 0-based and half-open by definition
System.out.println(String.format("chr%d\t%d\t%d\thit\t%f\t%s\n",
h.chrom,
h.strand ? h.pos-1 : h.pos - h.length -1,
h.strand ? h.pos + h.length : h.pos,
h.weight,
h.strand ? "+" : "-"));
} else {
System.out.println(h.toString());
}
}
}
}
}
} catch (Exception e) {
System.err.println("FROM " + line);
e.printStackTrace();
}
}
client.close();
}
}