/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.flaptor.indextank.query;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.util.Version;
import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;
public class IndexEngineParser {
private final Analyzer analyzer;
private final String defaultField;
public IndexEngineParser(String defaultField, Analyzer analyzer) {
this.defaultField = defaultField;
this.analyzer = analyzer;
}
public IndexEngineParser(String defaultField) {
this(defaultField, new IndexEngineAnalyzer());
}
@SuppressWarnings("deprecation")
public QueryNode parseQuery(final String queryStr) throws ParseException {
org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser(Version.LUCENE_CURRENT, defaultField, getAnalyzer());
qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
org.apache.lucene.search.Query luceneQuery;
try {
luceneQuery = qp.parse(queryStr);
} catch (Exception e) {
throw new ParseException("lucene failed parsing. " + e);
}
return internalParse(luceneQuery, queryStr);
}
/**
* Returns a lucene Analyzer that behaves like the analyzer used
* internally in this class.
* Try not to use this method.
*/
public Analyzer getAnalyzer() {
return analyzer;
}
public Iterator<AToken> parseDocumentField(String fieldName, String content) {
final TokenStream tkstream = analyzer.tokenStream(fieldName, new StringReader(content));
final TermAttribute termAtt = tkstream.addAttribute(TermAttribute.class);
final PositionIncrementAttribute posIncrAttribute = tkstream.addAttribute(PositionIncrementAttribute.class);
final OffsetAttribute offsetAtt = tkstream.addAttribute(OffsetAttribute.class);
return new AbstractIterator<AToken>() {
int currentPosition = 0;
@Override
protected AToken computeNext() {
try {
if (!tkstream.incrementToken()) {
tkstream.end();
tkstream.close();
return endOfData();
}
} catch (IOException e) {
//This should never happen, as the reader is a StringReader
}
//final org.apache.lucene.analysis.Token luceneTk = tkstream.getAttribute(org.apache.lucene.analysis.Token.class);
currentPosition += posIncrAttribute.getPositionIncrement();
final int position = currentPosition;
final int startOffset = offsetAtt.startOffset();
final int endOffset = offsetAtt.endOffset();
final String text = termAtt.term();
return new AToken() {
@Override
public String getText() {
return text; //luceneTk.term();
}
@Override
public int getPosition() {
return position; //luceneTk.getPositionIncrement();
}
@Override
public int getStartOffset() {
return startOffset;
}
@Override
public int getEndOffset() {
return endOffset;
}
};
}
};
}
private QueryNode internalParse(org.apache.lucene.search.Query luceneQuery, final String originalStr) throws ParseException {
QueryNode node;
if (luceneQuery instanceof org.apache.lucene.search.TermQuery) {
Term t = ((org.apache.lucene.search.TermQuery) luceneQuery).getTerm();
String field = t.field();
String text = t.text();
node = new TermQuery(field, text);
} else if (luceneQuery instanceof org.apache.lucene.search.PrefixQuery) {
Term t = ((org.apache.lucene.search.PrefixQuery) luceneQuery).getPrefix();
String field = t.field();
String text = t.text();
node = new PrefixTermQuery(field, text);
} else if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) {
List<BooleanClause> clauses = ((org.apache.lucene.search.BooleanQuery) luceneQuery).clauses();
if (clauses.isEmpty()) {
throw new ParseException("error parsing: " + originalStr);
}
node = internalParseBooleanQuery(clauses, originalStr);
} else if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery) {
org.apache.lucene.search.PhraseQuery phraseQuery = (org.apache.lucene.search.PhraseQuery) luceneQuery;
int[] positions = phraseQuery.getPositions();
node = internalParsePhraseQuery(phraseQuery.getTerms(), positions, originalStr);
} else {
throw new ParseException("unimplemented");
}
node.setBoost(luceneQuery.getBoost());
return node;
}
private QueryNode internalParsePhraseQuery(Term[] terms, int[] positions, final String originalStr) {
Preconditions.checkArgument(terms.length > 0, "too few terms to build a phrase query");
String[] strs = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
strs[i] = terms[i].text();
}
return new SimplePhraseQuery(terms[0].field(), strs, positions);
}
private QueryNode internalParseBooleanQuery(List<BooleanClause> list, final String originalStr) throws ParseException {
Preconditions.checkArgument(list.size() > 0, "too few terms to build a boolean query");
List<BooleanClause> positiveClauses = new ArrayList<BooleanClause>();
List<BooleanClause> negativeClauses = new ArrayList<BooleanClause>();
for (BooleanClause clause : list) {
if (clause.isProhibited()) {
negativeClauses.add(clause);
} else {
positiveClauses.add(clause);
}
}
if (positiveClauses.isEmpty()) {
throw new ParseException("No positive clauses.");
}
QueryNode retVal = internalParsePositive(positiveClauses, originalStr);
for (BooleanClause clause : negativeClauses) {
retVal = new DifferenceQuery(retVal, internalParse(clause.getQuery(), null));
}
return retVal;
}
private QueryNode internalParsePositive(List<BooleanClause> list, final String originalStr) throws ParseException {
Preconditions.checkArgument(list.size() > 0, "too few terms to build a boolean query");
QueryNode firstQuery = internalParse(list.get(0).getQuery(), null);
if (1 == list.size()) {
return firstQuery;
}
if (list.get(1).isRequired()) {
return new AndQuery(firstQuery, internalParseBooleanQuery(list.subList(1,list.size()), null));
} else if (list.get(1).isProhibited()) {
return new DifferenceQuery(firstQuery, internalParseBooleanQuery(list.subList(1,list.size()), null));
} else {
return new OrQuery(firstQuery, internalParseBooleanQuery(list.subList(1,list.size()), null));
}
}
public static void main(String[] args) throws Exception {
System.out.println("Parsing \"" + args[0] + "\" ...");
IndexEngineParser parser = new IndexEngineParser("text");
QueryNode query = parser.parseQuery(args[0]);
System.out.println(query);
}
}