package gov.nih.ncgc.bard.search;
import gov.nih.ncgc.bard.entity.Compound;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.MalformedURLException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Full text search for compound entities.
*
* @author Rajarshi Guha
*/
public class CompoundSearch extends SolrSearch {
private final String HL_FIELD = "text";
private final String PKEY_COMPOUND_DOC = "cid";
Logger log;
String[] facetNames = {"compound_class", "COLLECTION", "mwt", "tpsa", "xlogp"};
public CompoundSearch(String query, String coreName) {
super(query);
CORE_NAME = coreName;
log = LoggerFactory.getLogger(this.getClass());
}
public void run(boolean detailed, String filter, Integer top, Integer skip) throws MalformedURLException, SolrServerException {
results = new SearchResult();
SolrServer solr = null;
solr = new CommonsHttpSolrServer(getSolrURL() + CORE_NAME);
QueryResponse response = null;
SolrQuery sq = new SolrQuery(query);
sq = setFilterQueries(sq, filter);
sq.setRows(10000);
sq.setShowDebugInfo(true);
// add in some default faceting stuff
sq.setFacet(true);
sq.addFacetQuery("mwt:[* TO 100]");
sq.addFacetQuery("mwt:[100 TO 200]");
sq.addFacetQuery("mwt:[200 TO 300]");
sq.addFacetQuery("mwt:[300 TO *]");
sq.addFacetQuery("tpsa:[* TO 40]");
sq.addFacetQuery("tpsa:[40 TO 120]");
sq.addFacetQuery("tpsa:[120 TO 180]");
sq.addFacetQuery("tpsa:[180 TO *]");
sq.addFacetQuery("xlogp:[* TO 1]");
sq.addFacetQuery("xlogp:[1 TO 3]");
sq.addFacetQuery("xlogp:[3 TO 5]");
sq.addFacetQuery("xlogp:[5 TO *]");
sq.setFacetMinCount(1);
sq.addFacetField("compound_class");
response = solr.query(sq);
List<SolrDocument> docs = getHighlightedDocuments(response, PKEY_COMPOUND_DOC, HL_FIELD);
// get facet counts
long start = System.currentTimeMillis();
facets = new ArrayList<Facet>();
for (String f : facetNames) facets.add(new Facet(f));
// before doing some manual faceting, we extract the
// facets (query and field) that we set directly in the query
Map<String, Integer> solrf = response.getFacetQuery();
if (solrf != null) {
for (Facet f : facets) {
for (String key : solrf.keySet()) {
if (key.startsWith(f.getFacetName())) {
f.counts.put(key.replace(f.getFacetName() + ":", ""), solrf.get(key));
}
}
}
}
for (Facet aFacet : facets) {
FacetField targetFacet = response.getFacetField(aFacet.getFacetName());
if (targetFacet == null) continue;
List<FacetField.Count> fcounts = targetFacet.getValues();
if (fcounts != null) {
for (FacetField.Count fcount : fcounts) {
aFacet.counts.put(fcount.getName(), (int) fcount.getCount());
}
}
}
// we manually update facet counts COLLECTION
List<Long> cids = new ArrayList<Long>();
for (SolrDocument doc : docs) {
Object id = doc.getFieldValue(PKEY_COMPOUND_DOC);
try {
if (id != null) {
long cid = Long.parseLong(id.toString());
cids.add(cid);
}
} catch (Exception ex) {
log.warn("** Bogus cid " + id);
}
Collection<Object> collection = doc.getFieldValues("COLLECTION");
if (collection == null) {
continue;
}
List<Object> clist = new ArrayList<Object>(collection);
Set<String> vset = new HashSet<String>();
for (Facet facet : facets) {
if (!facet.getFacetName().equals("COLLECTION")) continue;
for (Object aClist : clist) {
String v = ((String) aClist).trim();
if (v == null || v.length() == 0 || v.equals("")) continue;
if (v.contains("|")) v = v.split("|")[0].trim();
if (v.length() == 0 || v.equals("")) continue;
vset.add(v);
}
// at this stage we have a unique set of COLLECTION values for this document
// lets update the COLLECTION facet
for (String v : vset) facet.addFacetValue(v);
}
}
long end = System.currentTimeMillis();
log.info("Facet summary calculated in " + (end - start) / 1000.0 + "s");
SearchMeta meta = new SearchMeta();
meta.setNhit(response.getResults().getNumFound());
meta.setFacets(facets);
meta.setQueryTime(response.getQTime());
meta.setElapsedTime(response.getElapsedTime());
try {
putEtag(cids, Compound.class);
} catch (Exception e) {
log.error("Can't process ETag", e);
}
// only return the requested number of docs, from the requested starting point
// and generate reduced representation if required
//
// Also extract the matching field names for the docs we do return
Map<String, String> xplainMap = response.getExplainMap();
Map<String, Map<String, Object>> matchFields = new HashMap<String, Map<String, Object>>();
Map<String, Float> scores = new LinkedHashMap<String, Float>(); // to maintain doc id ordering
// first set up field match details & document scores
int size = Math.min(skip + top, docs.size());
for (int i = skip; i < size; i++) {
SolrDocument doc = docs.get(i);
String compoundId = (String) doc.getFieldValue(PKEY_COMPOUND_DOC);
Map<String, Object> value = new HashMap<String, Object>();
List<String> fns = SearchUtil.getMatchingFieldNames(xplainMap.get(compoundId));
for (String fn : fns) {
Object obj = doc.getFieldValue(fn);
value.put(fn, obj);
}
matchFields.put(compoundId, value);
scores.put(compoundId, (Float) doc.getFieldValue("score"));
}
meta.setMatchingFields(matchFields);
meta.setScores(scores);
List ret;
if (!detailed) {
ret = copyRange(docs, skip, top, detailed, PKEY_COMPOUND_DOC, "smiles", "iupacName", "preferredTerm", "compound_class");
} else {
ret = new ArrayList();
try {
for (int i = skip; i < size; i++) {
SolrDocument doc = docs.get(i);
String compoundId = (String) doc.getFieldValue(PKEY_COMPOUND_DOC);
List<Compound> cmpds = db.getCompoundsByCid(Long.parseLong(compoundId));
if (cmpds != null && cmpds.size() >0) ret.add(cmpds.get(0));
}
db.closeConnection();
} catch (SQLException e) {
e.printStackTrace();
}
}
results.setDocs(ret);
results.setMetaData(meta);
}
}