package lucli;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Map.Entry;
import jline.ConsoleReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
/**
* Various methods that interact with Lucene and provide info about the
* index, search, etc. Parts adapted from Lucene demo.
*/
class LuceneMethods {
private int numDocs;
private String indexName; //directory of this index
private java.util.Iterator fieldIterator;
private List fields; //Fields as a vector
private List indexedFields; //Fields as a vector
private String fieldsArray[]; //Fields as an array
private Searcher searcher;
private Query query; //current query string
private String analyzerClassFQN = null; // Analyzer class, if NULL, use default Analyzer
public LuceneMethods(String index) {
indexName = index;
message("Lucene CLI. Using directory '" + indexName + "'. Type 'help' for instructions.");
}
private Analyzer createAnalyzer() {
if (analyzerClassFQN == null) return new StandardAnalyzer();
try {
Class aClass = Class.forName(analyzerClassFQN);
Object obj = aClass.newInstance();
if (!(obj instanceof Analyzer)) {
message("Given class is not an Analyzer: " + analyzerClassFQN);
return new StandardAnalyzer();
}
return (Analyzer)obj;
} catch (Exception e) {
message("Unable to use Analyzer " + analyzerClassFQN);
return new StandardAnalyzer();
}
}
public void info() throws java.io.IOException {
IndexReader indexReader = IndexReader.open(indexName);
getFieldInfo();
numDocs = indexReader.numDocs();
message("Index has " + numDocs + " documents ");
message("All Fields:" + fields.toString());
message("Indexed Fields:" + indexedFields.toString());
if (IndexReader.isLocked(indexName)) {
message("Index is locked");
}
//IndexReader.getCurrentVersion(indexName);
//System.out.println("Version:" + version);
indexReader.close();
}
public void search(String queryString, boolean explain, boolean showTokens, ConsoleReader cr)
throws java.io.IOException, org.apache.lucene.queryParser.ParseException {
Hits hits = initSearch(queryString);
System.out.println(hits.length() + " total matching documents");
if (explain) {
query = explainQuery(queryString);
}
final int HITS_PER_PAGE = 10;
message("--------------------------------------");
for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
int end = Math.min(hits.length(), start + HITS_PER_PAGE);
for (int ii = start; ii < end; ii++) {
Document doc = hits.doc(ii);
message("---------------- " + (ii + 1) + " score:" + hits.score(ii) + "---------------------");
printHit(doc);
if (showTokens) {
invertDocument(doc);
}
if (explain) {
Explanation exp = searcher.explain(query, hits.id(ii));
message("Explanation:" + exp.toString());
}
}
message("#################################################");
if (hits.length() > end) {
// TODO: don't let the input end up in the command line history
queryString = cr.readLine("more (y/n) ? ");
if (queryString.length() == 0 || queryString.charAt(0) == 'n')
break;
}
}
searcher.close();
}
/**
* TODO: Allow user to specify what field(s) to display
*/
private void printHit(Document doc) {
for (int ii = 0; ii < fieldsArray.length; ii++) {
String currField = fieldsArray[ii];
String[] result = doc.getValues(currField);
if (result != null) {
for (int i = 0; i < result.length; i++) {
message(currField + ":" + result[i]);
}
} else {
message(currField + ": <not available>");
}
}
//another option is to just do message(doc);
}
public void optimize() throws IOException {
//open the index writer. False: don't create a new one
IndexWriter indexWriter = new IndexWriter(indexName, createAnalyzer(), false);
message("Starting to optimize index.");
long start = System.currentTimeMillis();
indexWriter.optimize();
message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
indexWriter.close();
}
private Query explainQuery(String queryString) throws IOException, ParseException {
searcher = new IndexSearcher(indexName);
Analyzer analyzer = createAnalyzer();
getFieldInfo();
int arraySize = indexedFields.size();
String indexedArray[] = new String[arraySize];
for (int ii = 0; ii < arraySize; ii++) {
indexedArray[ii] = (String) indexedFields.get(ii);
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(indexedArray, analyzer);
query = parser.parse(queryString);
System.out.println("Searching for: " + query.toString());
return (query);
}
/**
* TODO: Allow user to specify analyzer
*/
private Hits initSearch(String queryString) throws IOException, ParseException {
searcher = new IndexSearcher(indexName);
Analyzer analyzer = createAnalyzer();
getFieldInfo();
int arraySize = fields.size();
fieldsArray = new String[arraySize];
for (int ii = 0; ii < arraySize; ii++) {
fieldsArray[ii] = (String) fields.get(ii);
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldsArray, analyzer);
query = parser.parse(queryString);
System.out.println("Searching for: " + query.toString());
Hits hits = searcher.search(query);
return (hits);
}
public void count(String queryString) throws java.io.IOException, ParseException {
Hits hits = initSearch(queryString);
System.out.println(hits.length() + " total documents");
searcher.close();
}
static public void message(String s) {
System.out.println(s);
}
private void getFieldInfo() throws IOException {
IndexReader indexReader = IndexReader.open(indexName);
fields = new ArrayList();
indexedFields = new ArrayList();
//get the list of all field names
fieldIterator = indexReader.getFieldNames(FieldOption.ALL).iterator();
while (fieldIterator.hasNext()) {
Object field = fieldIterator.next();
if (field != null && !field.equals(""))
fields.add(field.toString());
}
//
//get the list of indexed field names
fieldIterator = indexReader.getFieldNames(FieldOption.INDEXED).iterator();
while (fieldIterator.hasNext()) {
Object field = fieldIterator.next();
if (field != null && !field.equals(""))
indexedFields.add(field.toString());
}
indexReader.close();
}
// Copied from DocumentWriter
// Tokenizes the fields of a document into Postings.
private void invertDocument(Document doc)
throws IOException {
Map tokenMap = new HashMap();
final int maxFieldLength = 10000;
Analyzer analyzer = createAnalyzer();
Iterator fields = doc.getFields().iterator();
final Token reusableToken = new Token();
while (fields.hasNext()) {
Field field = (Field) fields.next();
String fieldName = field.name();
if (field.isIndexed()) {
if (field.isTokenized()) { // un-tokenized field
Reader reader; // find or make Reader
if (field.readerValue() != null)
reader = field.readerValue();
else if (field.stringValue() != null)
reader = new StringReader(field.stringValue());
else
throw new IllegalArgumentException
("field must have either String or Reader value");
int position = 0;
// Tokenize field and add to postingTable
TokenStream stream = analyzer.tokenStream(fieldName, reader);
TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class);
try {
while (stream.incrementToken()) {
position += (posIncrAtt.getPositionIncrement() - 1);
position++;
String name = termAtt.term();
Integer Count = (Integer) tokenMap.get(name);
if (Count == null) { // not in there yet
tokenMap.put(name, new Integer(1)); //first one
} else {
int count = Count.intValue();
tokenMap.put(name, new Integer(count + 1));
}
if (position > maxFieldLength) break;
}
} finally {
stream.close();
}
}
}
}
Entry[] sortedHash = getSortedMapEntries(tokenMap);
for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {
Entry currentEntry = sortedHash[ii];
message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
}
}
/** Provides a list of the top terms of the index.
*
* @param field - the name of the command or null for all of them.
*/
public void terms(String field) throws IOException {
TreeMap termMap = new TreeMap();
IndexReader indexReader = IndexReader.open(indexName);
TermEnum terms = indexReader.terms();
while (terms.next()) {
Term term = terms.term();
//message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
//if we're either not looking by field or we're matching the specific field
if ((field == null) || field.equals(term.field()))
termMap.put(term.field() + ":" + term.text(), new Integer((terms.docFreq())));
}
Iterator termIterator = termMap.keySet().iterator();
for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
String termDetails = (String) termIterator.next();
Integer termFreq = (Integer) termMap.get(termDetails);
message(termDetails + ": " + termFreq);
}
indexReader.close();
}
/** Sort Map values
* @param m the map we're sorting
* from http://developer.java.sun.com/developer/qow/archive/170/index.jsp
*/
public static Entry[]
getSortedMapEntries(Map m) {
Set set = m.entrySet();
Entry[] entries =
(Entry[]) set.toArray(
new Entry[set.size()]);
Arrays.sort(entries, new Comparator() {
public int compare(Object o1, Object o2) {
Object v1 = ((Entry) o1).getValue();
Object v2 = ((Entry) o2).getValue();
return ((Comparable) v2).compareTo(v1); //descending order
}
});
return entries;
}
public void analyzer(String word) {
if ("current".equals(word)) {
String current = analyzerClassFQN == null ? "StandardAnalyzer" : analyzerClassFQN;
message("The currently used Analyzer class is: " + current);
return;
}
analyzerClassFQN = word;
message("Switched to Analyzer class " + analyzerClassFQN);
}
}