package org.seqcode.data.readdb.tools;
import org.apache.commons.cli.*;
import java.io.*;
import java.net.*;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
/**
* Query hits from a remote BAM file.Reads chrom:start-stop:strand values from
* stdin. Repeats them on stdout along with the hit positions
*
* Usage:
* Query --data http://nanog.csail.mit.edu/readdb-test/foo.bam --index foo.index [--quiet]
*
* --quiet means don't print any output. This is useful for testing the query performance
* without worrying about the time it takes to print the output.
*
* --data and --index can be either local or remote
*
*/
public class BAMQuery {
private String data, index;
private SamReader reader;
private boolean quiet, weights, noheader;
private int histogram;
public static void main(String args[]) throws Exception {
BAMQuery query = new BAMQuery();
query.parseArgs(args);
query.run(System.in);
}
public void parseArgs(String args[]) throws IllegalArgumentException, ParseException {
Options options = new Options();
options.addOption("q","quiet",false,"quiet: don't print output");
options.addOption("d","data",true,"url for data");
options.addOption("i","index",true,"url for index file");
options.addOption("w","weights",false,"get and print weights in addition to positions");
options.addOption("H","histogram",true,"produce a histogram with this binsize instead of printing all read positions");
options.addOption("N","noheader",false,"skip printing the query header");
CommandLineParser parser = new GnuParser();
CommandLine line = parser.parse( options, args, false );
quiet = line.hasOption("quiet");
weights = line.hasOption("weights");
noheader = line.hasOption("noheader");
if (line.hasOption("histogram")) {
histogram = Integer.parseInt(line.getOptionValue("histogram"));
} else {
histogram = 0;
}
if (line.hasOption("data")) {
data = line.getOptionValue("data");
} else {
throw new IllegalArgumentException("Must provide --data");
}
if (line.hasOption("index")) {
index = line.getOptionValue("index");
} else {
throw new IllegalArgumentException("Must provide --index");
}
}
public BAMQuery() {}
public SamReader createReader() throws IOException, URISyntaxException {
URI indexURI = new URI(index);
String indexFilename = null;
if (indexURI.getScheme() != null && indexURI.getScheme().equals("file")) {
indexFilename = indexURI.getPath();
} else if (indexURI.getScheme() != null && indexURI.getScheme().equals("http")) {
DefaultHttpClient httpclient = new DefaultHttpClient();
HttpGet httpget = new HttpGet(indexURI);
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
indexFilename = File.createTempFile("bam","index").getAbsolutePath();
if (entity != null) {
FileOutputStream os = new FileOutputStream(indexFilename);
entity.writeTo(os);
os.close();
}
httpclient.getConnectionManager().shutdown();
} else {
indexFilename = index; // hope for the best here
}
SamReaderFactory factory =
SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
.validationStringency(ValidationStringency.SILENT);
File datafile = new File(data);
if (datafile.exists()) {
return factory.open(SamInputResource.of(datafile).index(new File(indexFilename)));
} else {
return factory.open(SamInputResource.of(new URL(data)).index(new File(indexFilename)));
}
}
public void run(InputStream instream) throws IOException, URISyntaxException {
SamReader bam = createReader();
// bam.enableIndexCaching(true);
BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
String line = null;
while ((line = reader.readLine()) != null) {
int start = 0, stop = 0;
String chr = null;
Boolean strand = null;
try {
String pieces[] = line.split("[\\:]");
chr = pieces[0];
strand = pieces.length >= 3 ? pieces[2].equals("-") : null;
pieces = pieces[1].split("\\-");
start = Integer.parseInt(pieces[0]);
stop = Integer.parseInt(pieces[1]);
} catch (Exception e) {
System.err.println("Error parsing line " + line + " FROM " + e.toString());
continue;
}
SAMRecordIterator iter = bam.query(chr, start, stop, false);
if (histogram > 0) {
int hist[] = new int[(stop - start) / histogram + 1];
while (iter.hasNext()) {
SAMRecord record = iter.next();
if (strand != null && strand != record.getReadNegativeStrandFlag()) {
continue;
}
int pos = record.getReadNegativeStrandFlag() ? record.getAlignmentEnd() : record.getAlignmentStart();
int bin = (pos - start) / histogram;
if (bin < hist.length && bin >= 0) {
hist[bin]++;
}
}
if (!quiet) {
for (int i = 0; i < hist.length; i++) {
System.out.println(String.format("%d\t%d", start + i*histogram + histogram/2, hist[i]));
}
}
} else {
while (iter.hasNext()) {
SAMRecord record = iter.next();
if (strand != null && strand != record.getReadNegativeStrandFlag()) {
continue;
}
if (!quiet) {
System.out.println(String.format("chrom %s, pos %d, %s, len %d",
record.getReferenceName(),
record.getReadNegativeStrandFlag() ? record.getAlignmentEnd() : record.getAlignmentStart(),
record.getReadNegativeStrandFlag() ? "-" : "+",
record.getReadLength()));
}
}
}
iter.close();
}
reader.close();
bam.close();
}
}