/******************************************************************************* * Trombone is a flexible text processing and analysis library used * primarily by Voyant Tools (voyant-tools.org). * * Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell * * This file is part of Trombone. * * Trombone is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Trombone is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Trombone. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package org.voyanttools.trombone.lucene.analysis; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.snowball.SnowballFilter; /** * @author sgs * */ public class MultiLingualStemAnalyzer extends LexicalAnalyzer { private StemmableLanguage sl; public MultiLingualStemAnalyzer(String lang) { sl = lang.length()==2 ? StemmableLanguage.fromCode(lang) : StemmableLanguage.valueOf(lang); } /* (non-Javadoc) * @see org.apache.lucene.analysis.Analyzer#createComponents(java.lang.String, java.io.Reader) */ @Override protected TokenStreamComponents createComponents(String fieldName) { TokenStreamComponents tsc = super.createComponents(fieldName); return new TokenStreamComponents(tsc.getTokenizer(), new SnowballFilter(tsc.getTokenStream(), StringUtils.capitalize(sl.name().toLowerCase()))); } }