package gov.nih.ncgc.bard.search;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.TermsParams;
/**
* A one line summary.
*
* @author Rajarshi Guha
*/
public class SearchUtil {
static final String explainRegex = "\\(MATCH\\) .*weight\\((.+?):";
static final Pattern pattern = Pattern.compile(explainRegex);
public static List<String> getMatchingFieldNames(String explain) {
List<String> r = new ArrayList<String>();
Matcher matcher = pattern.matcher(explain);
while (matcher.find()) {
for (int i = 0; i < matcher.groupCount(); i++) {
r.add(matcher.group(i+1));
}
}
return r;
}
/**
* Extract query field and values from a general BARD filter query parameter.
* <p/>
* Solr filter queries are encoded as
* <code>
* fq(fieldName:fieldValue)
* </code>
* and multiple such specifications can be included as a comma separated list. <code>fieldValue</code>
* can be an arbitrary string or a numeric range of the form<code>[low TO high]</code> with low and
* high being numbers or <code>*</code>.
* <p/>
* Note that this method does not check whether the field names specified
* are actually valid for a Solr document.
*
* @param filter The filter parameter string from a BARD request
* @return A map whose keys are field names and values are field values.
*/
public static Map<String, List<String>> extractFilterQueries(String filter, List<SolrField> fields) {
Map<String, SolrField> map = new HashMap<String, SolrField>();
for (SolrField f : fields) map.put(f.getName(), f);
HashMap<String, List<String>> ret = new HashMap<String, List<String>>();
if (filter == null || filter.trim().equals("")) return ret;
Pattern pattern = Pattern.compile("fq\\((.*?):[\"'](.*?)[\"']\\),");
Matcher matcher = pattern.matcher(filter);
while (matcher.find()) {
for (int i = 1; i < matcher.groupCount(); i += 2) {
String fname = matcher.group(i);
String fvalue = matcher.group(i + 1).trim();
SolrField solrField = map.get(fname);
if (solrField == null) continue;
String type = solrField.getType();
boolean isNumericField = type.contains("int") || type.contains("float");
if (!fvalue.contains("\"") && !isNumericField) fvalue = "\"" + fvalue + "\"";
if (ret.containsKey(fname)) {
List<String> tmp = ret.get(fname);
tmp.add(fvalue);
ret.put(fname, tmp);
} else {
List<String> tmp = new ArrayList<String>();
tmp.add(fvalue);
ret.put(fname, tmp);
}
}
}
return ret;
}
public static void deleteDocs(String url, String... docIds) throws IOException, SolrServerException {
SolrServer solr = new CommonsHttpSolrServer(url);
List<String> l = new ArrayList<String>();
Collections.addAll(l, docIds);
solr.deleteById(l);
solr.commit(true, true);
}
/**
*
* @param url The Solr URL (including relevant core)
* @param fields The fields to consider
* @param q The query. It is assumed to be a complete regex
* @param n The number of suggestions desired
* @return
* @throws MalformedURLException
* @throws SolrServerException
*/
public static Map<String, List<String>> getTerms(String url, SolrField[] fields, String q, Integer n) throws MalformedURLException, SolrServerException {
SolrServer solr = new CommonsHttpSolrServer(url);
SolrQuery query = new SolrQuery();
query.setParam(CommonParams.QT, "/terms");
query.setParam(TermsParams.TERMS, true);
query.setParam(TermsParams.TERMS_LIMIT, String.valueOf(n));
String[] fieldNames = new String[fields.length];
for (int i = 0; i < fields.length; i++) fieldNames[i] = fields[i].getName();
query.setParam(TermsParams.TERMS_FIELD, fieldNames);
query.setParam(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive");
query.setParam(TermsParams.TERMS_REGEXP_STR, q);
QueryResponse response = solr.query(query);
TermsResponse termsr = response.getTermsResponse();
Map<String, List<String>> termMap = new HashMap<String, List<String>>();
for (SolrField field : fields) {
List<TermsResponse.Term> terms = termsr.getTerms(field.getName());
if (terms != null) {
List<String> l = new ArrayList<String>();
for (TermsResponse.Term term : terms) l.add(term.getTerm());
if (l.size() > 0) termMap.put(field.getName(), l);
}
}
return termMap;
}
public static void main(String[] args) throws Exception {
String url = "http://carnot.ncats.nih.gov:8094/solr/core-assay-v14/";
SearchUtil.deleteDocs(url, "1748");
System.exit(-1);
String s = "0.33150536 = (MATCH) max of:\n" +
" 0.2429601 = (MATCH) weight(av_dict_label:lopac in 2985), product of:\n" +
" 0.51730007 = queryWeight(av_dict_label:lopac), product of:\n" +
" 7.514713 = idf(docFreq=6, maxDocs=4725)\n" +
" 0.0688383 = queryNorm\n" +
" 0.46966955 = (MATCH) fieldWeight(av_dict_label:lopac in 2985), product of:\n" +
" 1.0 = tf(termFreq(av_dict_label:lopac)=1)\n" +
" 7.514713 = idf(docFreq=6, maxDocs=4725)\n" +
" 0.0625 = fieldNorm(field=av_dict_label, doc=2985)\n" +
" 0.33150536 = (MATCH) weight(description:lopac in 2985), product of:\n" +
" 0.5404622 = queryWeight(description:lopac), product of:\n" +
" 7.851185 = idf(docFreq=4, maxDocs=4725)\n" +
" 0.0688383 = queryNorm\n" +
" 0.6133738 = (MATCH) fieldWeight(description:lopac in 2985), product of:\n" +
" 1.0 = tf(termFreq(description:lopac)=1)\n" +
" 7.851185 = idf(docFreq=4, maxDocs=4725)\n" +
" 0.078125 = fieldNorm(field=description, doc=2985)\n";
List<String> fn = getMatchingFieldNames(s);
for (String afn:fn) System.out.println(afn);
System.exit(-1);
SolrServer solr = new CommonsHttpSolrServer("http://protease.nhgri.nih.gov/servlet/solr/core-assay/");
SolrQuery query = new SolrQuery();
query.setParam(CommonParams.QT, "/terms");
query.setParam(TermsParams.TERMS, true);
query.setParam(TermsParams.TERMS_LIMIT, String.valueOf(10));
query.setParam(TermsParams.TERMS_FIELD, "gobp_term"); // or whatever fields you want
query.setParam(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive");
query.setParam(TermsParams.TERMS_REGEXP_STR, "dna re.*");
QueryResponse response = solr.query(query);
System.out.println("response = " + response);
TermsResponse termsr = response.getTermsResponse();
System.out.println("termsr = " + termsr);
List<TermsResponse.Term> terms = termsr.getTerms("gobp_term");
System.out.println("terms.size() = " + terms.size());
for (TermsResponse.Term term : terms) System.out.println("term.getTerm() = " + term.getTerm());
}
}