/******************************************************************************* * Trombone is a flexible text processing and analysis library used * primarily by Voyant Tools (voyant-tools.org). * * Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell * * This file is part of Trombone. * * Trombone is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Trombone is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Trombone. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package org.voyanttools.trombone.lucene.analysis; import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.voyanttools.trombone.nlp.NlpFactory; import org.voyanttools.trombone.storage.Storage; /** * @author sgs * */ public class KitchenSinkPerFieldAnalyzerWrapper extends AnalyzerWrapper { private static Analyzer keywordAnalyzer = new KeywordAnalyzer(); private Storage storage; private enum AnalyzerName { ID("id") { @Override Analyzer getAnalyzer(Storage storage) {return keywordAnalyzer;} }, VERSION("version") { @Override Analyzer getAnalyzer(Storage storage) {return keywordAnalyzer;} }, STEMMED_EN("stemmed-en") { @Override Analyzer getAnalyzer(Storage storage) {return new MultiLingualStemAnalyzer("en");} }, // TODO: re-enable lemmatization // LEMMATIZED_EN("lemmatized-en") { // @Override // Analyzer getAnalyzer() {return new EnglishMorphologicalAnalyzer();} // }, LEXICAL("lexical"), LEMMA("lemma") { @Override Analyzer getAnalyzer(Storage storage) { NlpFactory factory = storage.getNlpAnnotatorFactory(); return new LemmaAnalyzer(factory); } }; // MORPH_EN("morph-en"); private String name; AnalyzerName(String name) { this.name = name; } Analyzer getAnalyzer(Storage storage) { return new LexicalAnalyzer(); } public static AnalyzerName getName(String name) { for (AnalyzerName n : values()) { if (n.name.equals(name)) {return n;} } return null; } } private final Analyzer defaultAnalyzer; private final Map<AnalyzerName, Analyzer> fieldAnalyzers; public KitchenSinkPerFieldAnalyzerWrapper(Storage storage) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.storage = storage; this.defaultAnalyzer = new LexicalAnalyzer(); this.fieldAnalyzers = new HashMap<AnalyzerName, Analyzer>(); for (AnalyzerName name : AnalyzerName.values()) { this.fieldAnalyzers.put(name, null); } } @Override protected Analyzer getWrappedAnalyzer(String fieldName) { AnalyzerName name = AnalyzerName.getName(fieldName); if (name==null) return defaultAnalyzer; Analyzer analyzer = this.fieldAnalyzers.get(name); if (analyzer==null) { analyzer = name.getAnalyzer(storage); this.fieldAnalyzers.put(name, analyzer); } return analyzer; } @Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { return components; } @Override public String toString() { return "KitchenSinkPerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")"; } }