package org.seqcode.motifs;
import java.util.*;
import java.sql.*;
import org.seqcode.data.motifdb.*;
import org.seqcode.gseutils.*;
/**
* For each input motif, searches entire database for similar motifs.
* Produces an output image for each showing the input motif and
* those that are similar
*/
public class SimilarMotifs {
public static void main(String args[]) throws NotFoundException, SQLException {
boolean normalize = Args.parseFlags(args).contains("normalize");
int compareLength = Args.parseInteger(args,"compareLength",-1);
double maxDistance = Args.parseDouble(args,"maxDistance",3.0);
WMComparator comparator = new WMDistanceComparator(normalize,compareLength);
Collection<WeightMatrix> inputmatrices = Args.parseWeightMatrices(args);
System.err.println("Looking for matches to " + inputmatrices.size() + " matrices");
Collection<WeightMatrix> allmatrices = WeightMatrix.getAllWeightMatrices();
MarkovBackgroundModel bgModel = null;
String bgmodelname = Args.parseString(args,"bgmodel","whole genome zero order");
BackgroundModelMetadata md = BackgroundModelLoader.getBackgroundModel(bgmodelname,
1,
"MARKOV",
Args.parseGenome(args).cdr().getDBID());
if (md != null) {
bgModel = BackgroundModelLoader.getMarkovModel(md);
} else {
System.err.println("Couldn't get metadata for " + bgmodelname);
}
for (WeightMatrix m : allmatrices) {
m.toFrequency(bgModel);
}
for (WeightMatrix m : inputmatrices) {
m.toFrequency(bgModel);
}
for (WeightMatrix m : inputmatrices) {
ArrayList<WeightMatrix> cluster = new ArrayList<WeightMatrix>();
cluster.add(m);
for (WeightMatrix other : allmatrices) {
if (other.equals(m)) { continue; }
double distance = comparator.compare(m, other);
if (distance <= maxDistance) {
cluster.add(other);
}
}
if (cluster.size() > 1) {
String fname = m.toString().replaceAll("[^\\w\\d]","_") + ".png";
ClusterMotifs.drawCluster(cluster,fname);
for (WeightMatrix wm : cluster) {
System.out.println(wm.getName() + "\t" + wm.getVersion());
}
}
}
}
}