package org.wikibrain.lucene; import com.typesafe.config.Config; import org.apache.commons.collections.CollectionUtils; import org.apache.lucene.util.Version; import org.wikibrain.conf.Configuration; import org.wikibrain.conf.ConfigurationException; import org.wikibrain.conf.Configurator; import org.wikibrain.core.model.NameSpace; import java.io.File; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; /** * * This class can be instantiated through a configurator or by default configuration. * It provides access to all configurable options relevant to Lucene, such as version, * directory, and namespaces to index. It also contains static final variables for * different field names. It should be passed to all classes in the lucene package. * * @author Ari Weiland * */ public class LuceneOptions { public static final String LOCAL_ID_FIELD_NAME = "local_id"; public static final String LANG_ID_FIELD_NAME = "lang_id"; public final String name; public final Configurator configurator; public final Version matchVersion; public final File luceneRoot; public final Collection<NameSpace> namespaces; public final TokenizerOptions options; public final TextFieldElements elements; /** * Used by provider only. */ private LuceneOptions(String name, Configurator configurator, String matchVersion, String luceneRoot, List<String> namespaces, TokenizerOptions options, TextFieldElements elements) { this.name = name; this.configurator = configurator; this.matchVersion = Version.parseLeniently(matchVersion); this.luceneRoot = new File(luceneRoot); this.namespaces = new ArrayList<NameSpace>(); for (String s : namespaces) { this.namespaces.add(NameSpace.getNameSpaceByName(s)); } this.options = options; this.elements = elements; } /** * Returns a default set of LuceneOptions. * * @return a default set of LuceneOptions */ public static LuceneOptions getDefaultOptions() { try { return new Configurator(new Configuration()).get(LuceneOptions.class, "plaintext"); } catch (ConfigurationException e) { throw new RuntimeException(e); } } private static TokenizerOptions buildOptions(boolean caseInsensitive, boolean useStopWords, boolean useStem) { TokenizerOptions options = new TokenizerOptions(); if (caseInsensitive) options.caseInsensitive(); if (useStopWords) options.useStopWords(); if (useStem) options.useStem(); return options; } private static TextFieldElements buildElements(int title, boolean redirects, boolean plainText) { TextFieldElements elements = new TextFieldElements(); elements.addTitle(title); if (redirects) elements.addRedirects(); if (plainText) elements.addPlainText(); return elements; } @Override public boolean equals(Object o) { if (!(o instanceof LuceneOptions)) return false; LuceneOptions opts = (LuceneOptions) o; return (this.name.equalsIgnoreCase(opts.name) && this.matchVersion == opts.matchVersion && this.luceneRoot.equals(opts.luceneRoot) && CollectionUtils.isEqualCollection(this.namespaces, opts.namespaces) && this.options.equals(opts.options) && this.elements.equals(opts.elements)); } public static class Provider extends org.wikibrain.conf.Provider<LuceneOptions> { public Provider(Configurator configurator, Configuration config) throws ConfigurationException { super(configurator, config); } @Override public Class getType() { return LuceneOptions.class; } @Override public String getPath() { return "lucene.options"; } @Override public LuceneOptions get(String name, Config config, Map<String, String> runtimeParams) throws ConfigurationException { if (!name.equalsIgnoreCase(config.getString("type"))) { throw new ConfigurationException("Could not find configuration " + name); } return new LuceneOptions( name, getConfigurator(), config.getString("version"), config.getString("directory"), config.getStringList("namespaces"), buildOptions( config.getBoolean("caseInsensitive"), config.getBoolean("useStopWords"), config.getBoolean("useStem")), buildElements( config.getInt("title"), config.getBoolean("redirects"), config.getBoolean("plaintext")) ); } } }