package de.danielbasedow.prospecter.core.analysis;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.inject.Guice;
import com.google.inject.Injector;
import de.danielbasedow.prospecter.core.TokenMapper;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
public class LuceneGermanAnalyzer extends LuceneAnalyzer {
public LuceneGermanAnalyzer(TokenMapper mapper, org.apache.lucene.analysis.Analyzer analyzer) {
super(mapper);
luceneAnalyzer = analyzer;
}
public static Analyzer make(JsonNode options) {
Injector injector = Guice.createInjector(new AnalyzerModule());
CharArraySet stopWordSet = getStopWords(options.get("stopwords"), GermanAnalyzer.getDefaultStopSet());
org.apache.lucene.analysis.Analyzer analyzer = new GermanAnalyzer(Version.LUCENE_4_9, stopWordSet);
TokenMapper mapper = injector.getInstance(TokenMapper.class);
mapper.setBloomFilter(getBloomFilter(options));
return new LuceneGermanAnalyzer(mapper, analyzer);
}
}