/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.flaptor.indextank.index.lsi;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.BlockingDeque;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.MMapDirectory;
import com.flaptor.indextank.index.QueryMatcher;
import com.flaptor.indextank.index.lsi.term.IndexReaderTermMatcher;
import com.flaptor.indextank.index.scorer.FacetingManager;
import com.flaptor.indextank.index.scorer.Scorer;
import com.flaptor.indextank.index.term.TermMatcher;
import com.flaptor.indextank.index.term.query.TermBasedQueryMatcher;
import com.flaptor.indextank.query.IndexEngineParser;
import com.flaptor.util.Execute;
import com.flaptor.util.Pair;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
public class LsiIndex {
private static final Logger logger = Logger.getLogger(Execute.whoAmI());
public static final int SEARCHER_POOL_SIZE = 2;
public static final String PAYLOAD_TERM_FIELD = "docidpayload";
public static final String PAYLOAD_TERM_TEXT = "docidpayload";
public static final Term PAYLOAD_TERM = new Term(PAYLOAD_TERM_FIELD, PAYLOAD_TERM_TEXT);
private final File dirLocation;
private /*final*/ Directory directory;
private final Scorer scorer;
private final AtomicReference<Pair<BlockingDeque<IndexSearcher>, BlockingDeque<QueryMatcher>>> searchObjects;
private volatile IndexWriter indexWriter;
private final FacetingManager facetingManager;
private final IndexEngineParser parser;
private final Map<String, String> stats = new ConcurrentHashMap<String, String>();
public LsiIndex(IndexEngineParser parser, String directoryPath, Scorer scorer, FacetingManager facetingManager) throws IOException {
this.parser = parser;
this.scorer = scorer;
this.facetingManager = facetingManager;
Preconditions.checkNotNull(directoryPath);
dirLocation = new File(directoryPath);
if (!dirLocation.exists() || !dirLocation.isDirectory()) {
throw new IllegalArgumentException("Wrong directory path.");
}
directory = new MMapDirectory(dirLocation);
reopenWriter();
searchObjects = new AtomicReference<Pair<BlockingDeque<IndexSearcher>, BlockingDeque<QueryMatcher>>>();
reopenSearcher();
}
private void reopenSearcher() {
BlockingDeque<IndexSearcher> searcherPool= new LinkedBlockingDeque<IndexSearcher>();
BlockingDeque<QueryMatcher> matcherPool = new LinkedBlockingDeque<QueryMatcher>();
for (int i=0; i < SEARCHER_POOL_SIZE; i++) {
try {
IndexSearcher searcher = new IndexSearcher(directory, true); //read-only for better concurrent performance.
TermMatcher termMatcher = new IndexReaderTermMatcher(searcher.getIndexReader(), PAYLOAD_TERM);
QueryMatcher matcher = new TermBasedQueryMatcher(scorer, termMatcher, this.facetingManager);
searcherPool.addFirst(searcher); //no blocking, throws exception.
matcherPool.addFirst(matcher);
} catch (CorruptIndexException cie) {
logger.fatal("HORROR!!! corrupted index. unable to reopen", cie);
} catch (IOException ioe) {
logger.fatal("HORROR!!! IO exception. unable to reopen", ioe);
}
}
searchObjects.set(new Pair<BlockingDeque<IndexSearcher>, BlockingDeque<QueryMatcher>>(searcherPool, matcherPool));
}
private void reopenWriter() throws CorruptIndexException, LockObtainFailedException, IOException {
indexWriter = new IndexWriter(this.directory, getAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
}
private Analyzer getAnalyzer() {
Analyzer analyzer = parser.getAnalyzer();
Analyzer payloadAnalyzer = new PayloadAnalyzer();
return new PerFieldAnalyzerWrapper(analyzer, ImmutableMap.of(LsiIndex.PAYLOAD_TERM_FIELD, payloadAnalyzer));
}
public BlockingDeque<IndexSearcher> getLuceneIndexSearcherPool() {
return searchObjects.get().first();
}
public BlockingDeque<QueryMatcher> getQueryMatcherPool() {
return searchObjects.get().last();
}
public IndexWriter getLuceneIndexWriter() {
return indexWriter;
}
public void flush(){
try {
long t = System.currentTimeMillis();
int before = indexWriter.maxDoc();
indexWriter.commit();
int after = indexWriter.maxDoc();
int total = indexWriter.numDocs();
double commitTime = (System.currentTimeMillis() - t) / 1000.0;
stats.put("lucene_doc_count", String.valueOf(total));
stats.put("commit_time", String.valueOf(commitTime));
stats.put("lucene_max_doc", String.valueOf(after));
stats.put("lucene_previous_max_doc", String.valueOf(before));
logger.info(String.format("Commited index to disk in %.3fs. Document count is %d. MaxDoc from %d to %d", commitTime, total, before, after));
} catch (IOException e) {
logger.fatal("unexpected exception while commiting the index: ", e);
System.exit(1);
}
reopenSearcher();
}
public Map<String, String> getStats() {
return stats;
}
}