package org.apache.lucene.queryParser.standard;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.text.Collator;
import java.text.DateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.messages.MessageImpl;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.messages.QueryParserMessages;
import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryParser.standard.processors.WildcardQueryNodeProcessor;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LocalizedTestCase;
/**
* This test case is a copy of the core Lucene query parser test, it was adapted
* to use new {@link QueryParserWrapper} instead of the old query parser.
*
* Tests QueryParser.
*/
public class TestQueryParserWrapper extends LocalizedTestCase {
public TestQueryParserWrapper(String name) {
super(name, new HashSet(Arrays.asList(new String[]{
"testLegacyDateRange", "testDateRange",
"testCJK", "testNumber", "testFarsiRangeCollating",
"testLocalDateFormat"
})));
}
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
public static class QPTestFilter extends TokenFilter {
TermAttribute termAtt;
OffsetAttribute offsetAtt;
/**
* Filter which discards the token 'stop' and which expands the token
* 'phrase' into 'phrase1 phrase2'
*/
public QPTestFilter(TokenStream in) {
super(in);
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
boolean inPhrase = false;
int savedStart = 0, savedEnd = 0;
public Token next(Token reusableToken) throws IOException {
Token token = reusableToken;
if (inPhrase) {
inPhrase = false;
token.setTermBuffer("phrase2");
token.setStartOffset(savedStart);
token.setEndOffset(savedEnd);
return reusableToken;
} else
while ((token = this.input.next(reusableToken)) != null) {
if (token.term().equals("phrase")) {
inPhrase = true;
savedStart = token.startOffset();
savedEnd = token.endOffset();
token.setTermBuffer("phrase1");
token.setStartOffset(savedStart);
token.setEndOffset(savedEnd);
return token;
} else if (!token.term().equals("stop"))
return token;
}
return null;
}
public boolean incrementToken() throws IOException {
if (inPhrase) {
inPhrase = false;
clearAttributes();
termAtt.setTermBuffer("phrase2");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else
while (input.incrementToken()) {
if (termAtt.term().equals("phrase")) {
inPhrase = true;
savedStart = offsetAtt.startOffset();
savedEnd = offsetAtt.endOffset();
termAtt.setTermBuffer("phrase1");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else if (!termAtt.term().equals("stop"))
return true;
}
return false;
}
}
public static class QPTestAnalyzer extends Analyzer {
/** Filters LowerCaseTokenizer with StopFilter. */
public final TokenStream tokenStream(String fieldName, Reader reader) {
return new QPTestFilter(new LowerCaseTokenizer(reader));
}
}
public static class QPTestParser extends QueryParserWrapper {
public QPTestParser(String f, Analyzer a) {
super(f, a);
QueryNodeProcessorPipeline newProcessorPipeline = new QueryNodeProcessorPipeline(
getQueryProcessor().getQueryConfigHandler());
newProcessorPipeline.addProcessor(new WildcardQueryNodeProcessor());
newProcessorPipeline.addProcessor(new QPTestParserQueryNodeProcessor());
newProcessorPipeline.addProcessor(getQueryProcessor());
setQueryProcessor(newProcessorPipeline);
}
protected Query getFuzzyQuery(String field, String termStr,
float minSimilarity) throws ParseException {
throw new ParseException("Fuzzy queries not allowed");
}
protected Query getWildcardQuery(String field, String termStr)
throws ParseException {
throw new ParseException("Wildcard queries not allowed");
}
private static class QPTestParserQueryNodeProcessor extends
QueryNodeProcessorImpl {
protected QueryNode postProcessNode(QueryNode node)
throws QueryNodeException {
return node;
}
protected QueryNode preProcessNode(QueryNode node)
throws QueryNodeException {
if (node instanceof WildcardQueryNode || node instanceof FuzzyQueryNode) {
throw new QueryNodeException(new MessageImpl(
QueryParserMessages.EMPTY_MESSAGE));
}
return node;
}
protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
throws QueryNodeException {
return children;
}
}
}
private int originalMaxClauses;
public void setUp() throws Exception {
super.setUp();
originalMaxClauses = BooleanQuery.getMaxClauseCount();
}
public QueryParserWrapper getParser(Analyzer a) throws Exception {
if (a == null)
a = new SimpleAnalyzer();
QueryParserWrapper qp = new QueryParserWrapper("field", a);
qp.setDefaultOperator(QueryParserWrapper.OR_OPERATOR);
return qp;
}
public Query getQuery(String query, Analyzer a) throws Exception {
return getParser(a).parse(query);
}
public Query getQueryAllowLeadingWildcard(String query, Analyzer a) throws Exception {
QueryParserWrapper parser = getParser(a);
parser.setAllowLeadingWildcard(true);
return parser.parse(query);
}
public void assertQueryEquals(String query, Analyzer a, String result)
throws Exception {
Query q = getQuery(query, a);
String s = q.toString("field");
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
}
public void assertQueryEqualsAllowLeadingWildcard(String query, Analyzer a, String result)
throws Exception {
Query q = getQueryAllowLeadingWildcard(query, a);
String s = q.toString("field");
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
}
public void assertQueryEquals(QueryParserWrapper qp, String field,
String query, String result) throws Exception {
Query q = qp.parse(query);
String s = q.toString(field);
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
}
public void assertEscapedQueryEquals(String query, Analyzer a, String result)
throws Exception {
String escapedQuery = QueryParserWrapper.escape(query);
if (!escapedQuery.equals(result)) {
fail("Query /" + query + "/ yielded /" + escapedQuery + "/, expecting /"
+ result + "/");
}
}
public void assertWildcardQueryEquals(String query, boolean lowercase,
String result, boolean allowLeadingWildcard) throws Exception {
QueryParserWrapper qp = getParser(null);
qp.setLowercaseExpandedTerms(lowercase);
qp.setAllowLeadingWildcard(allowLeadingWildcard);
Query q = qp.parse(query);
String s = q.toString("field");
if (!s.equals(result)) {
fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /"
+ result + "/");
}
}
public void assertWildcardQueryEquals(String query, boolean lowercase,
String result) throws Exception {
assertWildcardQueryEquals(query, lowercase, result, false);
}
public void assertWildcardQueryEquals(String query, String result)
throws Exception {
QueryParserWrapper qp = getParser(null);
Query q = qp.parse(query);
String s = q.toString("field");
if (!s.equals(result)) {
fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /"
+ result + "/");
}
}
public Query getQueryDOA(String query, Analyzer a) throws Exception {
if (a == null)
a = new SimpleAnalyzer();
QueryParserWrapper qp = new QueryParserWrapper("field", a);
qp.setDefaultOperator(QueryParserWrapper.AND_OPERATOR);
return qp.parse(query);
}
public void assertQueryEqualsDOA(String query, Analyzer a, String result)
throws Exception {
Query q = getQueryDOA(query, a);
String s = q.toString("field");
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
}
public void testCJK() throws Exception {
// Test Ideographic Space - As wide as a CJK character cell (fullwidth)
// used google to translate the word "term" to japanese -> ??
assertQueryEquals("term\u3000term\u3000term", null,
"term\u0020term\u0020term");
assertQueryEqualsAllowLeadingWildcard("??\u3000??\u3000??", null, "??\u0020??\u0020??");
}
public void testSimple() throws Exception {
assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
assertQueryEquals("term term term", null, "term term term");
assertQueryEquals("t�rm term term", new WhitespaceAnalyzer(),
"t�rm term term");
assertQueryEquals("�mlaut", new WhitespaceAnalyzer(), "�mlaut");
assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");
assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
assertQueryEquals("a AND NOT b", null, "+a -b");
assertQueryEquals("a AND -b", null, "+a -b");
assertQueryEquals("a AND !b", null, "+a -b");
assertQueryEquals("a && b", null, "+a +b");
assertQueryEquals("a && ! b", null, "+a -b");
assertQueryEquals("a OR b", null, "a b");
assertQueryEquals("a || b", null, "a b");
assertQueryEquals("a OR !b", null, "a -b");
assertQueryEquals("a OR ! b", null, "a -b");
assertQueryEquals("a OR -b", null, "a -b");
assertQueryEquals("+term -term term", null, "+term -term term");
assertQueryEquals("foo:term AND field:anotherTerm", null,
"+foo:term +anotherterm");
assertQueryEquals("term AND \"phrase phrase\"", null,
"+term +\"phrase phrase\"");
assertQueryEquals("\"hello there\"", null, "\"hello there\"");
assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
assertTrue(getQuery("hello", null) instanceof TermQuery);
assertTrue(getQuery("\"hello there\"", null) instanceof PhraseQuery);
assertQueryEquals("germ term^2.0", null, "germ term^2.0");
assertQueryEquals("(term)^2.0", null, "term^2.0");
assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0");
assertQueryEquals("term^2.0", null, "term^2.0");
assertQueryEquals("term^2", null, "term^2.0");
assertQueryEquals("\"germ term\"^2.0", null, "\"germ term\"^2.0");
assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0");
assertQueryEquals("(foo OR bar) AND (baz OR boo)", null,
"+(foo bar) +(baz boo)");
assertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d");
assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null,
"+(apple \"steve jobs\") -(foo bar baz)");
assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
"+(title:dog title:cat) -author:\"bob dole\"");
QueryParserWrapper qp = new QueryParserWrapper("field",
new StandardAnalyzer());
// make sure OR is the default:
assertEquals(QueryParserWrapper.OR_OPERATOR, qp.getDefaultOperator());
qp.setDefaultOperator(QueryParserWrapper.AND_OPERATOR);
assertEquals(QueryParserWrapper.AND_OPERATOR, qp.getDefaultOperator());
qp.setDefaultOperator(QueryParserWrapper.OR_OPERATOR);
assertEquals(QueryParserWrapper.OR_OPERATOR, qp.getDefaultOperator());
}
public void testPunct() throws Exception {
Analyzer a = new WhitespaceAnalyzer();
assertQueryEquals("a&b", a, "a&b");
assertQueryEquals("a&&b", a, "a&&b");
assertQueryEquals(".NET", a, ".NET");
}
public void testSlop() throws Exception {
assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
assertQueryEquals("\"term germ\"~2 flork", null, "\"term germ\"~2 flork");
assertQueryEquals("\"term\"~2", null, "term");
assertQueryEquals("\" \"~2 germ", null, "germ");
assertQueryEquals("\"term germ\"~2^2", null, "\"term germ\"~2^2.0");
}
public void testNumber() throws Exception {
// The numbers go away because SimpleAnalzyer ignores them
assertQueryEquals("3", null, "");
assertQueryEquals("term 1.0 1 2", null, "term");
assertQueryEquals("term term1 term2", null, "term term term");
Analyzer a = new StandardAnalyzer();
assertQueryEquals("3", a, "3");
assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
assertQueryEquals("term term1 term2", a, "term term1 term2");
}
public void testWildcard() throws Exception {
assertQueryEquals("term*", null, "term*");
assertQueryEquals("term*^2", null, "term*^2.0");
assertQueryEquals("term~", null, "term~0.5");
assertQueryEquals("term~0.7", null, "term~0.7");
assertQueryEquals("term~^2", null, "term~0.5^2.0");
assertQueryEquals("term^2~", null, "term~0.5^2.0");
assertQueryEquals("term*germ", null, "term*germ");
assertQueryEquals("term*germ^3", null, "term*germ^3.0");
assertTrue(getQuery("term*", null) instanceof PrefixQuery);
assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
fq = (FuzzyQuery) getQuery("term~", null);
assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
assertParseException("term~1.1"); // value > 1, throws exception
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
/*
* Tests to see that wild card terms are (or are not) properly lower-cased
* with propery parser configuration
*/
// First prefix queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Term*", true, "term*");
// explicitly set lowercase:
assertWildcardQueryEquals("term*", true, "term*");
assertWildcardQueryEquals("Term*", true, "term*");
assertWildcardQueryEquals("TERM*", true, "term*");
// explicitly disable lowercase conversion:
assertWildcardQueryEquals("term*", false, "term*");
assertWildcardQueryEquals("Term*", false, "Term*");
assertWildcardQueryEquals("TERM*", false, "TERM*");
// Then 'full' wildcard queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Te?m", "te?m");
// explicitly set lowercase:
assertWildcardQueryEquals("te?m", true, "te?m");
assertWildcardQueryEquals("Te?m", true, "te?m");
assertWildcardQueryEquals("TE?M", true, "te?m");
assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ");
// explicitly disable lowercase conversion:
assertWildcardQueryEquals("te?m", false, "te?m");
assertWildcardQueryEquals("Te?m", false, "Te?m");
assertWildcardQueryEquals("TE?M", false, "TE?M");
assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
// Fuzzy queries:
assertWildcardQueryEquals("Term~", "term~0.5");
assertWildcardQueryEquals("Term~", true, "term~0.5");
assertWildcardQueryEquals("Term~", false, "Term~0.5");
// Range queries:
// TODO: implement this on QueryParser
// Q0002E_INVALID_SYNTAX_CANNOT_PARSE: Syntax Error, cannot parse '[A TO
// C]': Lexical error at line 1, column 1. Encountered: "[" (91), after : ""
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
// Test suffix queries: first disallow
try {
assertWildcardQueryEquals("*Term", true, "*term");
fail();
} catch (ParseException pe) {
// expected exception
}
try {
assertWildcardQueryEquals("?Term", true, "?term");
fail();
} catch (ParseException pe) {
// expected exception
}
// Test suffix queries: then allow
assertWildcardQueryEquals("*Term", true, "*term", true);
assertWildcardQueryEquals("?Term", true, "?term", true);
}
public void testLeadingWildcardType() throws Exception {
QueryParserWrapper qp = getParser(null);
qp.setAllowLeadingWildcard(true);
assertEquals(WildcardQuery.class, qp.parse("t*erm*").getClass());
assertEquals(WildcardQuery.class, qp.parse("?term*").getClass());
assertEquals(WildcardQuery.class, qp.parse("*term*").getClass());
}
public void testQPA() throws Exception {
assertQueryEquals("term term^3.0 term", qpAnalyzer, "term term^3.0 term");
assertQueryEquals("term stop^3.0 term", qpAnalyzer, "term term");
assertQueryEquals("term term term", qpAnalyzer, "term term term");
assertQueryEquals("term +stop term", qpAnalyzer, "term term");
assertQueryEquals("term -stop term", qpAnalyzer, "term term");
assertQueryEquals("drop AND (stop) AND roll", qpAnalyzer, "+drop +roll");
assertQueryEquals("term +(stop) term", qpAnalyzer, "term term");
assertQueryEquals("term -(stop) term", qpAnalyzer, "term term");
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
assertQueryEquals("term phrase term", qpAnalyzer,
"term \"phrase1 phrase2\" term");
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
"+term -\"phrase1 phrase2\" term");
assertQueryEquals("stop^3", qpAnalyzer, "");
assertQueryEquals("stop", qpAnalyzer, "");
assertQueryEquals("(stop)^3", qpAnalyzer, "");
assertQueryEquals("((stop))^3", qpAnalyzer, "");
assertQueryEquals("(stop^3)", qpAnalyzer, "");
assertQueryEquals("((stop)^3)", qpAnalyzer, "");
assertQueryEquals("(stop)", qpAnalyzer, "");
assertQueryEquals("((stop))", qpAnalyzer, "");
assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery);
}
public void testRange() throws Exception {
assertQueryEquals("[ a TO z]", null, "[a TO z]");
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
QueryParserWrapper qp = new QueryParserWrapper("field",
new SimpleAnalyzer());
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
assertQueryEquals("[ a TO z ]", null, "[a TO z]");
assertQueryEquals("{ a TO z}", null, "{a TO z}");
assertQueryEquals("{ a TO z }", null, "{a TO z}");
assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0");
assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
assertQueryEquals("gack ( bar blar { a TO z}) ", null,
"gack (bar blar {a TO z})");
}
public void testFarsiRangeCollating() throws Exception {
RAMDirectory ramDir = new RAMDirectory();
IndexWriter iw = new IndexWriter(ramDir, new WhitespaceAnalyzer(), true,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
Field.Index.UN_TOKENIZED));
iw.addDocument(doc);
iw.close();
IndexSearcher is = new IndexSearcher(ramDir);
QueryParserWrapper qp = new QueryParserWrapper("content",
new WhitespaceAnalyzer());
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator c = Collator.getInstance(new Locale("ar"));
qp.setRangeCollator(c);
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a ConstantScoreRangeQuery
// with a Farsi Collator (or an Arabic one for the case when Farsi is not
// supported).
// Test ConstantScoreRangeQuery
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
// Test RangeQuery
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
is.close();
}
private String escapeDateString(String s) {
if (s.contains(" ")) {
return "\"" + s + "\"";
} else {
return s;
}
}
/** for testing legacy DateField support */
private String getLegacyDate(String s) throws Exception {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
return DateField.dateToString(df.parse(s));
}
/** for testing DateTools support */
private String getDate(String s, DateTools.Resolution resolution)
throws Exception {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
return getDate(df.parse(s), resolution);
}
/** for testing DateTools support */
private String getDate(Date d, DateTools.Resolution resolution)
throws Exception {
if (resolution == null) {
return DateField.dateToString(d);
} else {
return DateTools.dateToString(d, resolution);
}
}
private String getLocalizedDate(int year, int month, int day,
boolean extendLastDate) {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
Calendar calendar = new GregorianCalendar();
calendar.set(year, month, day);
if (extendLastDate) {
calendar.set(Calendar.HOUR_OF_DAY, 23);
calendar.set(Calendar.MINUTE, 59);
calendar.set(Calendar.SECOND, 59);
calendar.set(Calendar.MILLISECOND, 999);
}
return df.format(calendar.getTime());
}
/** for testing legacy DateField support */
public void testLegacyDateRange() throws Exception {
String startDate = getLocalizedDate(2002, 1, 1, false);
String endDate = getLocalizedDate(2002, 1, 4, false);
Calendar endDateExpected = new GregorianCalendar();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, "["
+ getLegacyDate(startDate) + " TO "
+ DateField.dateToString(endDateExpected.getTime()) + "]");
assertQueryEquals("{ " + escapeDateString(startDate) + " " + escapeDateString(endDate) + " }", null, "{"
+ getLegacyDate(startDate) + " TO " + getLegacyDate(endDate) + "}");
}
public void testDateRange() throws Exception {
String startDate = getLocalizedDate(2002, 1, 1, false);
String endDate = getLocalizedDate(2002, 1, 4, false);
Calendar endDateExpected = new GregorianCalendar();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
final String defaultField = "default";
final String monthField = "month";
final String hourField = "hour";
QueryParserWrapper qp = new QueryParserWrapper("field",
new SimpleAnalyzer());
// Don't set any date resolution and verify if DateField is used
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
endDateExpected.getTime(), null);
// set a field specific date resolution
qp.setDateResolution(monthField, DateTools.Resolution.MONTH);
// DateField should still be used for defaultField
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
endDateExpected.getTime(), null);
// set default date resolution to MILLISECOND
qp.setDateResolution(DateTools.Resolution.MILLISECOND);
// set second field specific date resolution
qp.setDateResolution(hourField, DateTools.Resolution.HOUR);
// for this field no field specific date resolution has been set,
// so verify if the default resolution is used
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
endDateExpected.getTime(), DateTools.Resolution.MILLISECOND);
// verify if field specific date resolutions are used for these two fields
assertDateRangeQueryEquals(qp, monthField, startDate, endDate,
endDateExpected.getTime(), DateTools.Resolution.MONTH);
assertDateRangeQueryEquals(qp, hourField, startDate, endDate,
endDateExpected.getTime(), DateTools.Resolution.HOUR);
}
public void assertDateRangeQueryEquals(QueryParserWrapper qp, String field,
String startDate, String endDate, Date endDateInclusive,
DateTools.Resolution resolution) throws Exception {
assertQueryEquals(qp, field, field + ":[" + escapeDateString(startDate) + " TO " + escapeDateString(endDate)
+ "]", "[" + getDate(startDate, resolution) + " TO "
+ getDate(endDateInclusive, resolution) + "]");
assertQueryEquals(qp, field, field + ":{" + escapeDateString(startDate) + " TO " + escapeDateString(endDate)
+ "}", "{" + getDate(startDate, resolution) + " TO "
+ getDate(endDate, resolution) + "}");
}
public void testEscaped() throws Exception {
Analyzer a = new WhitespaceAnalyzer();
/*
* assertQueryEquals("\\[brackets", a, "\\[brackets");
* assertQueryEquals("\\[brackets", null, "brackets");
* assertQueryEquals("\\\\", a, "\\\\"); assertQueryEquals("\\+blah", a,
* "\\+blah"); assertQueryEquals("\\(blah", a, "\\(blah");
*
* assertQueryEquals("\\-blah", a, "\\-blah"); assertQueryEquals("\\!blah",
* a, "\\!blah"); assertQueryEquals("\\{blah", a, "\\{blah");
* assertQueryEquals("\\}blah", a, "\\}blah"); assertQueryEquals("\\:blah",
* a, "\\:blah"); assertQueryEquals("\\^blah", a, "\\^blah");
* assertQueryEquals("\\[blah", a, "\\[blah"); assertQueryEquals("\\]blah",
* a, "\\]blah"); assertQueryEquals("\\\"blah", a, "\\\"blah");
* assertQueryEquals("\\(blah", a, "\\(blah"); assertQueryEquals("\\)blah",
* a, "\\)blah"); assertQueryEquals("\\~blah", a, "\\~blah");
* assertQueryEquals("\\*blah", a, "\\*blah"); assertQueryEquals("\\?blah",
* a, "\\?blah"); //assertQueryEquals("foo \\&\\& bar", a,
* "foo \\&\\& bar"); //assertQueryEquals("foo \\|| bar", a,
* "foo \\|| bar"); //assertQueryEquals("foo \\AND bar", a,
* "foo \\AND bar");
*/
assertQueryEquals("\\a", a, "a");
assertQueryEquals("a\\-b:c", a, "a-b:c");
assertQueryEquals("a\\+b:c", a, "a+b:c");
assertQueryEquals("a\\:b:c", a, "a:b:c");
assertQueryEquals("a\\\\b:c", a, "a\\b:c");
assertQueryEquals("a:b\\-c", a, "a:b-c");
assertQueryEquals("a:b\\+c", a, "a:b+c");
assertQueryEquals("a:b\\:c", a, "a:b:c");
assertQueryEquals("a:b\\\\c", a, "a:b\\c");
assertQueryEquals("a:b\\-c*", a, "a:b-c*");
assertQueryEquals("a:b\\+c*", a, "a:b+c*");
assertQueryEquals("a:b\\:c*", a, "a:b:c*");
assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
assertQueryEquals("a:b\\-?c", a, "a:b-?c");
assertQueryEquals("a:b\\+?c", a, "a:b+?c");
assertQueryEquals("a:b\\:?c", a, "a:b:?c");
assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
// TODO: implement Range queries on QueryParser
assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
assertQueryEquals(
"[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]",
a, "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]");
assertQueryEquals("a\\\\\\+b", a, "a\\+b");
assertQueryEquals("a \\\"b c\\\" d", a, "a \"b c\" d");
assertQueryEquals("\"a \\\"b c\\\" d\"", a, "\"a \"b c\" d\"");
assertQueryEquals("\"a \\+b c d\"", a, "\"a +b c d\"");
assertQueryEquals("c\\:\\\\temp\\\\\\~foo.txt", a, "c:\\temp\\~foo.txt");
assertParseException("XY\\"); // there must be a character after the escape
// char
// test unicode escaping
assertQueryEquals("a\\u0062c", a, "abc");
assertQueryEquals("XY\\u005a", a, "XYZ");
assertQueryEquals("XY\\u005A", a, "XYZ");
assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "\"a \\(b\" c\"");
assertParseException("XY\\u005G"); // test non-hex character in escaped
// unicode sequence
assertParseException("XY\\u005"); // test incomplete escaped unicode
// sequence
// Tests bug LUCENE-800
assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\");
assertParseException("(item:\\\\ item:ABCD\\\\))"); // unmatched closing
// paranthesis
assertQueryEquals("\\*", a, "*");
assertQueryEquals("\\\\", a, "\\"); // escaped backslash
assertParseException("\\"); // a backslash must always be escaped
// LUCENE-1189
assertQueryEquals("(\"a\\\\\") or (\"b\")", a, "a\\ or b");
}
public void testQueryStringEscaping() throws Exception {
Analyzer a = new WhitespaceAnalyzer();
assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c");
assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c");
assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c");
assertEscapedQueryEquals("a\\b:c", a, "a\\\\b\\:c");
assertEscapedQueryEquals("a:b-c", a, "a\\:b\\-c");
assertEscapedQueryEquals("a:b+c", a, "a\\:b\\+c");
assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c");
assertEscapedQueryEquals("a:b\\c", a, "a\\:b\\\\c");
assertEscapedQueryEquals("a:b-c*", a, "a\\:b\\-c\\*");
assertEscapedQueryEquals("a:b+c*", a, "a\\:b\\+c\\*");
assertEscapedQueryEquals("a:b:c*", a, "a\\:b\\:c\\*");
assertEscapedQueryEquals("a:b\\\\c*", a, "a\\:b\\\\\\\\c\\*");
assertEscapedQueryEquals("a:b-?c", a, "a\\:b\\-\\?c");
assertEscapedQueryEquals("a:b+?c", a, "a\\:b\\+\\?c");
assertEscapedQueryEquals("a:b:?c", a, "a\\:b\\:\\?c");
assertEscapedQueryEquals("a:b?c", a, "a\\:b\\?c");
assertEscapedQueryEquals("a:b-c~", a, "a\\:b\\-c\\~");
assertEscapedQueryEquals("a:b+c~", a, "a\\:b\\+c\\~");
assertEscapedQueryEquals("a:b:c~", a, "a\\:b\\:c\\~");
assertEscapedQueryEquals("a:b\\c~", a, "a\\:b\\\\c\\~");
assertEscapedQueryEquals("[ a - TO a+ ]", null, "\\[ a \\- TO a\\+ \\]");
assertEscapedQueryEquals("[ a : TO a~ ]", null, "\\[ a \\: TO a\\~ \\]");
assertEscapedQueryEquals("[ a\\ TO a* ]", null, "\\[ a\\\\ TO a\\* \\]");
// LUCENE-881
assertEscapedQueryEquals("|| abc ||", a, "\\|\\| abc \\|\\|");
assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&");
}
public void testTabNewlineCarriageReturn() throws Exception {
assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \n +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\r+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\r\n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r\n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r\n +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r \n +worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\t+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \t+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \t +worlbank", null, "+weltbank +worlbank");
}
public void testSimpleDAO() throws Exception {
assertQueryEqualsDOA("term term term", null, "+term +term +term");
assertQueryEqualsDOA("term +term term", null, "+term +term +term");
assertQueryEqualsDOA("term term +term", null, "+term +term +term");
assertQueryEqualsDOA("term +term +term", null, "+term +term +term");
assertQueryEqualsDOA("-term term term", null, "-term +term +term");
}
public void testBoost() throws Exception {
StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(
new String[] { "on" });
QueryParserWrapper qp = new QueryParserWrapper("field", oneStopAnalyzer);
Query q = qp.parse("on^1.0");
assertNotNull(q);
q = qp.parse("\"hello\"^2.0");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("hello^2.0");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("\"on\"^1.0");
assertNotNull(q);
QueryParserWrapper qp2 = new QueryParserWrapper("field",
new StandardAnalyzer());
q = qp2.parse("the^3");
// "the" is a stop word so the result is an empty query:
assertNotNull(q);
assertEquals("", q.toString());
assertEquals(1.0f, q.getBoost(), 0.01f);
}
public void assertParseException(String queryString) throws Exception {
try {
getQuery(queryString, null);
} catch (ParseException expected) {
return;
}
fail("ParseException expected, not thrown");
}
public void testException() throws Exception {
assertParseException("\"some phrase");
assertParseException("(foo bar");
assertParseException("foo bar))");
assertParseException("field:term:with:colon some more terms");
assertParseException("(sub query)^5.0^2.0 plus more");
assertParseException("secret AND illegal) AND access:confidential");
}
public void testCustomQueryParserWildcard() {
try {
new QPTestParser("contents", new WhitespaceAnalyzer()).parse("a?t");
fail("Wildcard queries should not be allowed");
} catch (ParseException expected) {
// expected exception
}
}
public void testCustomQueryParserFuzzy() throws Exception {
try {
new QPTestParser("contents", new WhitespaceAnalyzer()).parse("xunit~");
fail("Fuzzy queries should not be allowed");
} catch (ParseException expected) {
// expected exception
}
}
public void testBooleanQuery() throws Exception {
BooleanQuery.setMaxClauseCount(2);
try {
QueryParserWrapper qp = new QueryParserWrapper("field",
new WhitespaceAnalyzer());
qp.parse("one two three");
fail("ParseException expected due to too many boolean clauses");
} catch (ParseException expected) {
// too many boolean clauses, so ParseException is expected
}
}
/**
* This test differs from TestPrecedenceQueryParser
*/
public void testPrecedence() throws Exception {
QueryParserWrapper qp = new QueryParserWrapper("field",
new WhitespaceAnalyzer());
Query query1 = qp.parse("A AND B OR C AND D");
Query query2 = qp.parse("+A +B +C +D");
assertEquals(query1, query2);
}
public void testLocalDateFormat() throws IOException, ParseException {
RAMDirectory ramDir = new RAMDirectory();
IndexWriter iw = new IndexWriter(ramDir, new WhitespaceAnalyzer(), true,
IndexWriter.MaxFieldLength.LIMITED);
addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw);
addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw);
iw.close();
IndexSearcher is = new IndexSearcher(ramDir);
assertHits(1, "[12/1/2005 TO 12/3/2005]", is);
assertHits(2, "[12/1/2005 TO 12/4/2005]", is);
assertHits(1, "[12/3/2005 TO 12/4/2005]", is);
assertHits(1, "{12/1/2005 TO 12/3/2005}", is);
assertHits(1, "{12/1/2005 TO 12/4/2005}", is);
assertHits(0, "{12/3/2005 TO 12/4/2005}", is);
is.close();
}
public void testStarParsing() throws Exception {
// final int[] type = new int[1];
// QueryParser qp = new QueryParserWrapper("field", new
// WhitespaceAnalyzer()) {
// protected Query getWildcardQuery(String field, String termStr) throws
// ParseException {
// // override error checking of superclass
// type[0]=1;
// return new TermQuery(new Term(field,termStr));
// }
// protected Query getPrefixQuery(String field, String termStr) throws
// ParseException {
// // override error checking of superclass
// type[0]=2;
// return new TermQuery(new Term(field,termStr));
// }
//
// protected Query getFieldQuery(String field, String queryText) throws
// ParseException {
// type[0]=3;
// return super.getFieldQuery(field, queryText);
// }
// };
//
// TermQuery tq;
//
// tq = (TermQuery)qp.parse("foo:zoo*");
// assertEquals("zoo",tq.getTerm().text());
// assertEquals(2,type[0]);
//
// tq = (TermQuery)qp.parse("foo:zoo*^2");
// assertEquals("zoo",tq.getTerm().text());
// assertEquals(2,type[0]);
// assertEquals(tq.getBoost(),2,0);
//
// tq = (TermQuery)qp.parse("foo:*");
// assertEquals("*",tq.getTerm().text());
// assertEquals(1,type[0]); // could be a valid prefix query in the future
// too
//
// tq = (TermQuery)qp.parse("foo:*^2");
// assertEquals("*",tq.getTerm().text());
// assertEquals(1,type[0]);
// assertEquals(tq.getBoost(),2,0);
//
// tq = (TermQuery)qp.parse("*:foo");
// assertEquals("*",tq.getTerm().field());
// assertEquals("foo",tq.getTerm().text());
// assertEquals(3,type[0]);
//
// tq = (TermQuery)qp.parse("*:*");
// assertEquals("*",tq.getTerm().field());
// assertEquals("*",tq.getTerm().text());
// assertEquals(1,type[0]); // could be handled as a prefix query in the
// future
//
// tq = (TermQuery)qp.parse("(*:*)");
// assertEquals("*",tq.getTerm().field());
// assertEquals("*",tq.getTerm().text());
// assertEquals(1,type[0]);
}
public void testStopwords() throws Exception {
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(
new String[] { "the", "foo" }));
Query result = qp.parse("a:the OR a:foo");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
+ 0, ((BooleanQuery) result).clauses().size() == 0);
result = qp.parse("a:woo OR a:the");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a TermQuery", result instanceof TermQuery);
result = qp
.parse("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
System.out.println("Result: " + result);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
+ 2, ((BooleanQuery) result).clauses().size() == 2);
}
public void testPositionIncrement() throws Exception {
boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
StopFilter.setEnablePositionIncrementsDefault(true);
try {
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(
new String[] { "the", "in", "are", "this" }));
qp.setEnablePositionIncrements(true);
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
// 0 2 5 7 8
int expectedPositions[] = { 1, 3, 4, 6, 9 };
PhraseQuery pq = (PhraseQuery) qp.parse(qtxt);
// System.out.println("Query text: "+qtxt);
// System.out.println("Result: "+pq);
Term t[] = pq.getTerms();
int pos[] = pq.getPositions();
for (int i = 0; i < t.length; i++) {
// System.out.println(i+". "+t[i]+" pos: "+pos[i]);
assertEquals("term " + i + " = " + t[i] + " has wrong term-position!",
expectedPositions[i], pos[i]);
}
} finally {
StopFilter.setEnablePositionIncrementsDefault(dflt);
}
}
public void testMatchAllDocs() throws Exception {
QueryParserWrapper qp = new QueryParserWrapper("field",
new WhitespaceAnalyzer());
assertEquals(new MatchAllDocsQuery(), qp.parse("*:*"));
assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)"));
BooleanQuery bq = (BooleanQuery) qp.parse("+*:* -*:*");
assertTrue(bq.getClauses()[0].getQuery() instanceof MatchAllDocsQuery);
assertTrue(bq.getClauses()[1].getQuery() instanceof MatchAllDocsQuery);
}
private void assertHits(int expected, String query, IndexSearcher is)
throws ParseException, IOException {
QueryParserWrapper qp = new QueryParserWrapper("date",
new WhitespaceAnalyzer());
qp.setLocale(Locale.ENGLISH);
Query q = qp.parse(query);
ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs;
assertEquals(expected, hits.length);
}
private static void addDateDoc(String content, int year, int month, int day,
int hour, int minute, int second, IndexWriter iw) throws IOException {
Document d = new Document();
d.add(new Field("f", content, Field.Store.YES, Field.Index.ANALYZED));
Calendar cal = Calendar.getInstance(Locale.ENGLISH);
cal.set(year, month - 1, day, hour, minute, second);
d.add(new Field("date", DateField.dateToString(cal.getTime()),
Field.Store.YES, Field.Index.NOT_ANALYZED));
iw.addDocument(d);
}
public void tearDown() throws Exception {
super.tearDown();
BooleanQuery.setMaxClauseCount(originalMaxClauses);
}
}