/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.flaptor.indextank.index.lsi;
import java.io.IOException;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import com.flaptor.indextank.Indexer;
import com.flaptor.indextank.index.Document;
import com.flaptor.util.Execute;
import com.google.common.base.Preconditions;
/**
* @author Flaptor Development Team
*/
public final class LsiIndexer implements Indexer {
private static final Logger logger = Logger.getLogger(Execute.whoAmI());
//Lucene related variables.
private LsiIndex workIndex;
private IndexWriter writer;
/**
* Default constructor.
*/
public LsiIndexer(LsiIndex index) {
Preconditions.checkNotNull(index);
workIndex = index;
openWriter();
}
/**
*@inheritDoc
*/
public synchronized void add(final String docId, final Document itdoc) {
if (null == docId) {
logger.error("No documentId specified. Ignoring addition.");
return;
}
org.apache.lucene.document.Document doc = asLuceneDocument(itdoc);
org.apache.lucene.document.Field docidPayloadField = new org.apache.lucene.document.Field(LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED);
doc.add(docidPayloadField);
doc.add(new Field("documentId",docId,Field.Store.NO,Field.Index.NOT_ANALYZED));
try {
if (logger.isDebugEnabled()) {
logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames());
}
writer.updateDocument(docIdTerm(docId), doc);
} catch (IOException e) {
logger.error(e);
}
}
/**
*@inheritDoc
*/
public synchronized void del(final String docId) {
try {
writer.deleteDocuments(docIdTerm(docId));
} catch (IOException e) {
logger.error(e);
}
}
private static Term docIdTerm(final String docId) {
return new Term("documentId", docId);
}
/**
* Opens the writer.
* @throws RuntTimeException if there was a problem opening the writer.
*/
private void openWriter() {
writer = workIndex.getLuceneIndexWriter();
}
/**
* Closes the writer to be sure that all changes are in the directory and
* then calls the shell command that makes a copy of it. Used to make a copy
* of the directory while it's in a consistent state. If there is an index
* optimization scheduled, it'll be performed here.
* @throws IllegasStateException if the index copy couldn't be made.
* @throws RuntimeException if there was a problem opening the index.
*/
public synchronized void makeDirectoryCheckpoint() {
workIndex.flush();
}
// Helper method to transform an IndexTank Document to a Lucene Document
private static org.apache.lucene.document.Document asLuceneDocument(Document itd){
org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
for (String field: itd.getFieldNames()) {
doc.add(new Field(field, itd.getField(field), Field.Store.NO, Field.Index.ANALYZED));
}
return doc;
}
}