package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.io.Reader;
import java.util.Locale;
import java.text.Collator;
public class TestTermRangeQuery extends LuceneTestCase {
private int docCount = 0;
private RAMDirectory dir;
public void setUp() throws Exception {
super.setUp();
dir = new RAMDirectory();
}
public void testExclusive() throws Exception {
Query query = new TermRangeQuery("content", "A", "C", false, false);
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D, only B in range", 1, hits.length);
searcher.close();
initializeIndex(new String[] {"A", "B", "D"});
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,D, only B in range", 1, hits.length);
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("C added, still only B in range", 1, hits.length);
searcher.close();
}
//TODO: remove in Lucene 3.0
public void testDeprecatedCstrctors() throws IOException {
Query query = new RangeQuery(null, new Term("content","C"), false);
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D, only B in range", 2, hits.length);
searcher.close();
query = new RangeQuery(new Term("content","C"),null, false);
initializeIndex(new String[] {"A", "B", "C", "D"});
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D, only B in range", 1, hits.length);
searcher.close();
}
public void testInclusive() throws Exception {
Query query = new TermRangeQuery("content", "A", "C", true, true);
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
searcher.close();
initializeIndex(new String[]{"A", "B", "D"});
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,D - A and B in range", 2, hits.length);
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("C added - A, B, C in range", 3, hits.length);
searcher.close();
}
public void testEqualsHashcode() {
Query query = new TermRangeQuery("content", "A", "C", true, true);
query.setBoost(1.0f);
Query other = new TermRangeQuery("content", "A", "C", true, true);
other.setBoost(1.0f);
assertEquals("query equals itself is true", query, query);
assertEquals("equivalent queries are equal", query, other);
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
other.setBoost(2.0f);
assertFalse("Different boost queries are not equal", query.equals(other));
other = new TermRangeQuery("notcontent", "A", "C", true, true);
assertFalse("Different fields are not equal", query.equals(other));
other = new TermRangeQuery("content", "X", "C", true, true);
assertFalse("Different lower terms are not equal", query.equals(other));
other = new TermRangeQuery("content", "A", "Z", true, true);
assertFalse("Different upper terms are not equal", query.equals(other));
query = new TermRangeQuery("content", null, "C", true, true);
other = new TermRangeQuery("content", null, "C", true, true);
assertEquals("equivalent queries with null lowerterms are equal()", query, other);
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
query = new TermRangeQuery("content", "C", null, true, true);
other = new TermRangeQuery("content", "C", null, true, true);
assertEquals("equivalent queries with null upperterms are equal()", query, other);
assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
query = new TermRangeQuery("content", null, "C", true, true);
other = new TermRangeQuery("content", "C", null, true, true);
assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
query = new TermRangeQuery("content", "A", "C", false, false);
other = new TermRangeQuery("content", "A", "C", true, true);
assertFalse("queries with different inclusive are not equal", query.equals(other));
query = new TermRangeQuery("content", "A", "C", false, false);
other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
assertFalse("a query with a collator is not equal to one without", query.equals(other));
}
public void testExclusiveCollating() throws Exception {
Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D, only B in range", 1, hits.length);
searcher.close();
initializeIndex(new String[] {"A", "B", "D"});
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,D, only B in range", 1, hits.length);
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("C added, still only B in range", 1, hits.length);
searcher.close();
}
public void testInclusiveCollating() throws Exception {
Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
searcher.close();
initializeIndex(new String[]{"A", "B", "D"});
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,D - A and B in range", 2, hits.length);
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("C added - A, B, C in range", 3, hits.length);
searcher.close();
}
public void testFarsi() throws Exception {
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator collator = Collator.getInstance(new Locale("ar"));
Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a TermRangeQuery with a Farsi
// Collator (or an Arabic one for the case when Farsi is not supported).
initializeIndex(new String[]{ "\u0633\u0627\u0628"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, hits.length);
query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, hits.length);
searcher.close();
}
public void testDanish() throws Exception {
Collator collator = Collator.getInstance(new Locale("da", "dk"));
// Danish collation orders the words below in the given order (example taken
// from TestSort.testInternationalSort() ).
String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
// but Danish collation does.
initializeIndex(words);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, hits.length);
query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, hits.length);
searcher.close();
}
private static class SingleCharAnalyzer extends Analyzer {
private static class SingleCharTokenizer extends Tokenizer {
char[] buffer = new char[1];
boolean done;
TermAttribute termAtt;
public SingleCharTokenizer(Reader r) {
super(r);
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
public boolean incrementToken() throws IOException {
int count = input.read(buffer);
if (done)
return false;
else {
clearAttributes();
done = true;
if (count == 1) {
termAtt.termBuffer()[0] = buffer[0];
termAtt.setTermLength(1);
} else
termAtt.setTermLength(0);
return true;
}
}
public final void reset(Reader reader) throws IOException {
super.reset(reader);
done = false;
}
}
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
if (tokenizer == null) {
tokenizer = new SingleCharTokenizer(reader);
setPreviousTokenStream(tokenizer);
} else
tokenizer.reset(reader);
return tokenizer;
}
public TokenStream tokenStream(String fieldName, Reader reader) {
return new SingleCharTokenizer(reader);
}
}
private void initializeIndex(String[] values) throws IOException {
initializeIndex(values, new WhitespaceAnalyzer());
}
private void initializeIndex(String[] values, Analyzer analyzer) throws IOException {
IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
for (int i = 0; i < values.length; i++) {
insertDoc(writer, values[i]);
}
writer.close();
}
private void addDoc(String content) throws IOException {
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
insertDoc(writer, content);
writer.close();
}
private void insertDoc(IndexWriter writer, String content) throws IOException {
Document doc = new Document();
doc.add(new Field("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("content", content, Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
docCount++;
}
// LUCENE-38
public void testExclusiveLowerNull() throws Exception {
Analyzer analyzer = new SingleCharAnalyzer();
//http://issues.apache.org/jira/browse/LUCENE-38
Query query = new TermRangeQuery("content", null, "C",
false, false);
initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
IndexSearcher searcher = new IndexSearcher(dir);
Hits hits = searcher.search(query);
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 3, hits.length());
// until Lucene-38 is fixed, use this assert:
//assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 2, hits.length());
searcher.close();
initializeIndex(new String[] {"A", "B", "", "D"}, analyzer);
searcher = new IndexSearcher(dir);
hits = searcher.search(query);
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 3, hits.length());
// until Lucene-38 is fixed, use this assert:
//assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 2, hits.length());
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query);
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("C added, still A, B & <empty string> are in range", 3, hits.length());
// until Lucene-38 is fixed, use this assert
//assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length());
searcher.close();
}
// LUCENE-38
public void testInclusiveLowerNull() throws Exception {
//http://issues.apache.org/jira/browse/LUCENE-38
Analyzer analyzer = new SingleCharAnalyzer();
Query query = new TermRangeQuery("content", null, "C", true, true);
initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
IndexSearcher searcher = new IndexSearcher(dir);
Hits hits = searcher.search(query);
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, hits.length());
// until Lucene-38 is fixed, use this assert
//assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
searcher.close();
initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
searcher = new IndexSearcher(dir);
hits = searcher.search(query);
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, hits.length());
// until Lucene-38 is fixed, use this assert
//assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query);
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("C added => A,B,<empty string>,C in range", 4, hits.length());
// until Lucene-38 is fixed, use this assert
//assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
searcher.close();
}
}