/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.flaptor.indextank.api;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.json.simple.JSONValue;
import com.flaptor.indextank.BoostingIndexer;
import com.flaptor.indextank.DocumentStoringIndexer;
import com.flaptor.indextank.IndexRecoverer;
import com.flaptor.indextank.LogIndexRecoverer;
import com.flaptor.indextank.blender.Blender;
import com.flaptor.indextank.dealer.Dealer;
import com.flaptor.indextank.index.BasicPromoter;
import com.flaptor.indextank.index.lsi.LargeScaleIndex;
import com.flaptor.indextank.index.rti.RealTimeIndex;
import com.flaptor.indextank.index.scorer.BoostsScorer;
import com.flaptor.indextank.index.scorer.DynamicDataFacetingManager;
import com.flaptor.indextank.index.scorer.DynamicDataManager;
import com.flaptor.indextank.index.scorer.FacetingManager;
import com.flaptor.indextank.index.scorer.NoFacetingManager;
import com.flaptor.indextank.index.scorer.ScoreFunction;
import com.flaptor.indextank.index.scorer.UserFunctionsManager;
import com.flaptor.indextank.index.storage.InMemoryStorage;
import com.flaptor.indextank.query.IndexEngineParser;
import com.flaptor.indextank.query.analyzers.CompositeAnalyzer;
import com.flaptor.indextank.query.analyzers.FilteringAnalyzer;
import com.flaptor.indextank.rpc.IndexerServer;
import com.flaptor.indextank.rpc.IndexerStatus;
import com.flaptor.indextank.rpc.SearcherServer;
import com.flaptor.indextank.rpc.SuggestorServer;
import com.flaptor.indextank.search.DidYouMeanSearcher;
import com.flaptor.indextank.search.DocumentSearcher;
import com.flaptor.indextank.search.SnippetSearcher;
import com.flaptor.indextank.search.TrafficLimitingSearcher;
import com.flaptor.indextank.storage.alternatives.DocumentStorage;
import com.flaptor.indextank.suggest.DidYouMeanSuggestor;
import com.flaptor.indextank.suggest.NoSuggestor;
import com.flaptor.indextank.suggest.QuerySuggestor;
import com.flaptor.indextank.suggest.Suggestor;
import com.flaptor.indextank.suggest.TermSuggestor;
import com.flaptor.util.Execute;
import com.flaptor.util.FileUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
/**
*
* @author Flaptor Team
*/
public class EmbeddedIndexEngine {
private static final Logger logger = Logger.getLogger(Execute.whoAmI());
private BoostingIndexer indexer;
private DocumentSearcher searcher;
private BoostsScorer scorer;
private DynamicDataManager boostsManager;
private LargeScaleIndex lsi;
private RealTimeIndex rti;
private Suggestor suggestor;
private DocumentStorage storage = null;
private IndexEngineParser parser;
private UserFunctionsManager functionsManager = null;
private final BasicPromoter promoter;
private IndexerStatus status;
private final String indexCode;
private final String environment;
private final int basePort;
private final File baseDir;
private final String characterEncoding;
private IndexRecoverer.IndexStorageValue recoveryStorage;
private String cassandraClusterHosts;
/*
* In index configuration:
* - log_based_storage: true/false
* - log_server_host
* - log_server_port
*/
private boolean logBasedStorage = false;
private String logServerHost;
private int logServerPort;
private static final int DEFAULT_BASE_PORT = 7910;
private static final int DEFAULT_RTI_SIZE = 1000;
private static final int DEFAULT_BDB_CACHE = 100;
public static enum SuggestValues { NO, QUERIES, DOCUMENTS};
public static enum StorageValues { NO, BDB, RAM, CASSANDRA };
public EmbeddedIndexEngine( File baseDir,
int basePort,
int rtiSize,
boolean load,
int boostsSize,
SuggestValues suggest,
StorageValues storageValue,
int bdbCache,
String functions,
boolean facets,
String indexCode,
String environment ) throws IOException {
this( baseDir,
basePort,
rtiSize,
load,
boostsSize,
suggest,
storageValue,
bdbCache,
functions,
facets,
indexCode,
environment,
Maps.newHashMap()
);
}
public EmbeddedIndexEngine( File baseDir,
int basePort,
int rtiSize,
boolean load,
int boostsSize,
SuggestValues suggest,
StorageValues storageValue,
int bdbCache,
String functions,
boolean facets,
String indexCode,
String environment,
Map<Object, Object> configuration) throws IOException {
Preconditions.checkNotNull(indexCode);
Preconditions.checkNotNull(environment);
Preconditions.checkArgument(basePort > 0);
Preconditions.checkNotNull(baseDir);
this.indexCode = indexCode;
this.environment = environment;
this.basePort = basePort;
this.baseDir = baseDir;
String defaultField = "text";
if (configuration.containsKey("encoding")) {
characterEncoding = (String) configuration.get("encoding");
} else {
characterEncoding = Charset.defaultCharset().name();
}
if (configuration.containsKey("log_based_storage")) {
logBasedStorage = (Boolean)configuration.get("log_based_storage");
if (logBasedStorage) {
logServerHost = (String) configuration.get("log_server_host");
logServerPort = ((Long) configuration.get("log_server_port")).intValue();
}
}
Map<Object, Object> analyzerConfiguration = (Map<Object, Object>) configuration.get("analyzer_config");
if (analyzerConfiguration != null) {
Analyzer analyzer;
if (analyzerConfiguration.containsKey("perField")) {
Map<Object, Object> perfieldConfiguration = (Map<Object, Object>) analyzerConfiguration.get("perField");
Map<String, Analyzer> perfieldAnalyzers = Maps.newHashMap();
for (Entry<Object, Object> entry : perfieldConfiguration.entrySet()) {
String field = (String) entry.getKey();
Map<Object, Object> config = (Map<Object, Object>) entry.getValue();
perfieldAnalyzers.put(field, buildAnalyzer(config));
}
analyzer = new CompositeAnalyzer(buildAnalyzer((Map<Object, Object>) analyzerConfiguration.get("default")), perfieldAnalyzers);
} else {
analyzer = buildAnalyzer(analyzerConfiguration);
}
parser = new IndexEngineParser(defaultField, analyzer);
} else {
parser = new IndexEngineParser(defaultField);
}
boostsManager = new DynamicDataManager(boostsSize, baseDir);
scorer = new BoostsScorer(boostsManager, Maps.<Integer, ScoreFunction>newHashMap());
functionsManager = new UserFunctionsManager(scorer);
boolean someFunctionDefined = false;
String def0 = "0-A";
try {
functionsManager.addFunction(0, def0); // Default timestamp function
} catch (Exception ex) {
logger.error("Defining scoring function (spec '"+def0+"')", ex);
}
if (null != functions && !"".equals(functions)) {
String[] specs = functions.split("\\|");
for (String spec : specs) {
try {
String[] parts = spec.split(":",2);
if (parts.length == 2) {
int id = Integer.parseInt(parts[0].trim());
String def = parts[1].trim();
functionsManager.addFunction(id, def);
someFunctionDefined = true;
} else {
logger.error("Function should be defined as <id>:<definition> (found '"+spec+"').");
}
} catch (Exception ex) {
logger.error("Defining scoring function (spec '"+spec+"')", ex);
}
}
}
FacetingManager facetingManager;
if (facets) {
facetingManager = new DynamicDataFacetingManager(boostsManager);
} else {
facetingManager = new NoFacetingManager();
}
lsi = new LargeScaleIndex(scorer, parser, baseDir, facetingManager);
rti = new RealTimeIndex(scorer, parser, rtiSize, facetingManager);
switch (suggest) {
case NO:
suggestor = new NoSuggestor();
break;
case DOCUMENTS:
IndexEngineParser suggestorParser = new IndexEngineParser(defaultField);
suggestor = new TermSuggestor(suggestorParser, baseDir);
break;
case QUERIES:
suggestor = new QuerySuggestor(parser, baseDir);
break;
}
this.cassandraClusterHosts = (String) configuration.get("cassandra_cluster_hosts");
// index recovery configuration
String recoveryConf = (String) configuration.get("index_recovery");
if ("cassandra".equals(recoveryConf)) {
if (this.cassandraClusterHosts == null || this.cassandraClusterHosts.trim().length() == 0)
throw new IllegalArgumentException("Invalid cassandra servers for index recovery");
this.recoveryStorage = IndexRecoverer.IndexStorageValue.CASSANDRA;
logger.info("Index recovery configuration set to recover index from cassandra servers: " + this.cassandraClusterHosts);
} else {
logger.info("Index recovery configuration set to recover index from simpleDB");
this.recoveryStorage = IndexRecoverer.IndexStorageValue.SIMPLEDB;
}
switch (storageValue) {
case RAM:
storage = new InMemoryStorage(baseDir, load);
logger.info("Using in-memory storage");
break;
case NO:
storage = null;
logger.info("NOT Using storage");
break;
}
promoter = new BasicPromoter(baseDir, load);
searcher = new Blender(lsi, rti, suggestor, promoter, boostsManager);
indexer = new Dealer(lsi, rti, suggestor, boostsManager, rtiSize, promoter, functionsManager);
status = IndexerStatus.started;
}
private Analyzer buildAnalyzer(Map<Object, Object> configuration) {
Analyzer analyzer;
String factoryClassString = (String) configuration.get("factory");
Map<Object, Object> factoryConfig = (Map<Object, Object>) configuration.get("configuration");
try {
Class<?> factoryClass = Class.forName(factoryClassString);
Method method = factoryClass.getMethod("buildAnalyzer", new Class[] {Map.class});
analyzer = (Analyzer) method.invoke(null, factoryConfig);
if (factoryConfig.containsKey("filters")) {
analyzer = new FilteringAnalyzer(analyzer, factoryConfig);
}
} catch (ClassNotFoundException e) {
throw new RuntimeException("Analyzer factory class not found", e);
} catch (SecurityException e) {
throw new RuntimeException("Analyzer factory class not instantiable", e);
} catch (NoSuchMethodException e) {
throw new RuntimeException("Analyzer factory class does not have the required static method buildAnalyzer", e);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Analyzer factory class does not have the required static method buildAnalyzer", e);
} catch (IllegalAccessException e) {
throw new RuntimeException("Analyzer factory class does not have the required static method buildAnalyzer or it is not accessible", e);
} catch (InvocationTargetException e) {
throw new RuntimeException("Analyzer factory class threw an exception for the give configuration", e);
}
return analyzer;
};
public BoostsScorer getScorer(){
return this.scorer;
}
public DynamicDataManager getDynamicDataManager(){
return this.boostsManager;
}
public BoostingIndexer getIndexer(){
return this.indexer;
}
public DocumentSearcher getSearcher(){
return this.searcher;
}
public Suggestor getSuggestor() {
return this.suggestor;
}
public DocumentStorage getStorage() {
return storage;
}
public IndexEngineParser getParser() {
return parser;
}
public void setStatus(IndexerStatus status) {
this.status = status;
}
public IndexerStatus getStatus() {
return status;
}
public String getCharacterEncoding() {
return characterEncoding;
}
private void setIndexer(BoostingIndexer indexer) {
this.indexer = indexer;
}
private void setSearcher(DocumentSearcher searcher) {
this.searcher = searcher;
}
@SuppressWarnings("static-access")
private static Options getOptions(){
Option baseDir = OptionBuilder .withArgName("base-dir")
.hasArg()
.isRequired()
.withDescription("The basint e dir")
.withLongOpt("dir")
.create("d");
Option basePort = OptionBuilder .withArgName("base-port")
.hasArg()
.withDescription("The base port")
.withLongOpt("port")
.create("p");
Option boostSize = OptionBuilder .withArgName("boosts-size")
.hasArg()
.withDescription("Number of available boosts")
.withLongOpt("boosts")
.create("b");
Option rtiSize = OptionBuilder .withArgName("rti-size")
.hasArg()
.withDescription("The size limit for the RTI")
.withLongOpt("rti-size")
.create("rs");
Option help = OptionBuilder .withDescription("displays this help")
.withLongOpt("help")
.create("h");
Option snippets = OptionBuilder .withDescription("Allow snippet generation and field fetching.")
.withLongOpt("snippets")
.create("sn");
Option recover = OptionBuilder .withDescription("Recover documents from the storage.")
.withLongOpt("recover")
.create("r");
Option indexCode = OptionBuilder.withArgName("code")
.hasArg()
.isRequired()
.withDescription("the index code this indexengine has")
.withLongOpt("index-code")
.create("ic");
Option environment = OptionBuilder.withArgName("environment")
.hasArg()
.isRequired()
.withDescription("environment prefix")
.withLongOpt("environment-prefix")
.create("env");
/*
* Analyzer argument should receive a JSON string with the following root structure:
* - factory: a java type that implements the following static method: org.apache.lucene.analysis.Analyzer buildAnalyzer(Map).
* - configuration: a JSON object to be passed to the buildAnalyzer method.
*/
Option analyzer = OptionBuilder.withArgName("analyzer")
.hasArg()
.withDescription("specific analyzer")
.withLongOpt("analyzer")
.create("an");
Option configFile = OptionBuilder.withArgName("conf-file")
.hasArg()
.withDescription("configuration file")
.withLongOpt("conf-file")
.create("cf");
Option loadState = OptionBuilder.withArgName("load")
.withDescription("if present, the index engine will try to restore its state"
+ "from the serialized form.")
.withLongOpt("load-state")
.create("l");
Option suggest = OptionBuilder.withArgName("suggest")
.hasArg()
.withDescription("if present, loads the suggest/autocomplete system.")
.withLongOpt("suggest")
.create("su");
Option facets = OptionBuilder.withArgName("facets")
.withDescription("if present, performs facetings queries.")
.withLongOpt("facets")
.create("fa");
Option functions = OptionBuilder.withArgName("functions")
.hasArg()
.withDescription("list of '|' separated scoring functions, each of which has the form <id>:<definition>.")
.withLongOpt("functions")
.create("fn");
Option didyoumean = OptionBuilder.withLongOpt("didyoumean")
.withDescription("if present, performs 'did you mean?' suggestions on queries. Requires --suggest documents.")
.create("dym");
Option storage = OptionBuilder.withLongOpt("storage")
.hasArg()
.withDescription("if present, specifies a storage backend. Options are 'bdb' and 'ram'. Defaults to 'ram'.")
.create("st");
Option bdbCache = OptionBuilder.withLongOpt("bdb-cache")
.hasArg()
.withDescription("if present, specifies the size of the berkeleyDb cache per thread, in megabytes. Defaults to 100MB.")
.create("bc");
Options options = new Options();
options.addOption(baseDir);
options.addOption(basePort);
options.addOption(boostSize);
options.addOption(help);
options.addOption(snippets);
options.addOption(recover);
options.addOption(indexCode);
options.addOption(rtiSize);
options.addOption(loadState);
options.addOption(suggest);
options.addOption(facets);
options.addOption(functions);
options.addOption(environment);
options.addOption(analyzer);
options.addOption(didyoumean);
options.addOption(configFile);
options.addOption(storage);
options.addOption(bdbCache);
return options;
}
private static void printHelp(Options options, String error) {
if (null != error) {
System.out.println("Parsing failed. Reason: " + error);
}
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("IndexEngine",options);
}
//--------------------------------------------------------------------------------
//PRIVATE CLASSES
private static class ShutdownThread extends Thread {
private final BoostingIndexer server;
public ShutdownThread(BoostingIndexer server) {
this.server = server;
setName("IndexEngine's ShutdownThread");
}
@Override
public void run() {
try {
logger.info("Shutdown hook started.");
server.dump();
logger.info("Shutdown hook ended.");
} catch (Exception e) {
logger.error("Exception caught while saving state to disk. This probably means that some data was lost.", e);
}
}
}
public static class Components {
private BoostingIndexer indexer;
private DocumentSearcher searcher;
private Suggestor suggestor;
public Components(BoostingIndexer indexer, DocumentSearcher searcher,
Suggestor suggestor) {
this.indexer = indexer;
this.searcher = searcher;
this.suggestor = suggestor;
}
public BoostingIndexer getIndexer() {
return indexer;
}
public DocumentSearcher getSearcher() {
return searcher;
}
public Suggestor getSuggestor() {
return suggestor;
}
}
//--------------------------------------------------------------------------------
//STATIC METHODS
public static EmbeddedIndexEngine instantiate(String[] args) throws IOException{
String log4jConfigPath = com.flaptor.util.FileUtil.getFilePathFromClasspath("log4j.properties");
if (null != log4jConfigPath) {
org.apache.log4j.PropertyConfigurator.configureAndWatch(log4jConfigPath);
} else {
logger.warn("log4j.properties not found on classpath!");
}
// create the parser
CommandLineParser parser = new PosixParser();
try {
// parse the command line arguments
CommandLine line = parser.parse( getOptions(), args );
if (line.hasOption("help")) {
printHelp(getOptions(),null);
System.exit(1);
}
File baseDir = new File(line.getOptionValue("dir"));
int basePort = Integer.parseInt(line.getOptionValue("port", String.valueOf(DEFAULT_BASE_PORT)));
int boostsSize = Integer.parseInt(line.getOptionValue("boosts", String.valueOf(1)));
int rtiSize = Integer.parseInt(line.getOptionValue("rti-size", String.valueOf(DEFAULT_RTI_SIZE)));
boolean loadState = line.hasOption("load-state");
SuggestValues suggest;
if (line.hasOption("suggest")) {
String value = line.getOptionValue("suggest");
if ( value.equalsIgnoreCase("queries")) {
suggest = SuggestValues.QUERIES;
} else if ( value.equalsIgnoreCase("documents")) {
suggest = SuggestValues.DOCUMENTS;
} else {
throw new IllegalArgumentException("Invalid value for suggest: can only be \"queries\" or \"documents\".");
}
} else {
suggest = SuggestValues.NO;
}
StorageValues storageValue = StorageValues.RAM;
int bdbCache = 0;
if (line.hasOption("storage")){
String storageType = line.getOptionValue("storage");
if ("bdb".equals(storageType)) {
storageValue = StorageValues.BDB;
bdbCache = Integer.parseInt(line.getOptionValue("bdb-cache", String.valueOf(DEFAULT_BDB_CACHE)));
} else if ("cassandra".equals(storageType)) {
storageValue = StorageValues.CASSANDRA;
} else if ("ram".equals(storageType)) {
storageValue = StorageValues.RAM;
} else {
throw new IllegalArgumentException("storage has to be 'cassandra', 'bdb' or 'ram'. '" + storageType + "' given.");
}
}
String functions = null;
if (line.hasOption("functions")) {
functions = line.getOptionValue("functions");
}
String environment;
String val = line.getOptionValue("environment-prefix", null);
if (null != val) {
environment = val;
} else {
environment = "";
}
logger.info("Command line option 'environment-prefix' set to " + environment);
boolean facets = line.hasOption("facets");
logger.info("Command line option 'facets' set to " + facets);
String indexCode = line.getOptionValue("index-code");
logger.info("Command line option 'index-code' set to " + indexCode);
Map<Object, Object> configuration = Maps.newHashMap();
String configFile = line.getOptionValue("conf-file", null);
logger.info("Command line option 'conf-file' set to " + configFile);
if (configFile != null) {
configuration = (Map<Object, Object>) JSONValue.parse(FileUtil.readFile(new File(configFile)));
}
EmbeddedIndexEngine ie = new EmbeddedIndexEngine(
baseDir,
basePort,
rtiSize,
loadState,
boostsSize,
suggest,
storageValue,
bdbCache,
functions,
facets,
indexCode,
environment,
configuration);
BoostingIndexer indexer = ie.getIndexer();
DocumentSearcher searcher = ie.getSearcher();
Suggestor suggestor = ie.getSuggestor();
DocumentStorage storage = ie.getStorage();
if (line.hasOption("snippets")) {
indexer = new DocumentStoringIndexer(indexer, storage);
ie.setIndexer(indexer);
searcher = new SnippetSearcher(searcher, storage, ie.getParser());
ie.setSearcher(searcher);
}
if (line.hasOption("didyoumean")) {
if (suggest != SuggestValues.DOCUMENTS) {
throw new IllegalArgumentException("didyoumean requires --suggest documents");
}
DidYouMeanSuggestor dym = new DidYouMeanSuggestor((TermSuggestor)ie.getSuggestor());
searcher = new DidYouMeanSearcher(searcher, dym);
ie.setSearcher(searcher);
}
searcher = new TrafficLimitingSearcher(searcher);
Runtime.getRuntime().addShutdownHook(new ShutdownThread(indexer));
return ie;
} catch( ParseException exp ) {
printHelp(getOptions(),exp.getMessage());
}
System.exit(1);
return null;
}
}