/* * Licensed to STRATIO (C) under one or more contributor license agreements. * See the NOTICE file distributed with this work for additional information * regarding copyright ownership. The STRATIO (C) licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.stratio.cassandra.lucene.schema.analysis; import com.stratio.cassandra.lucene.IndexException; import com.stratio.cassandra.lucene.util.JsonSerializer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.IOUtils; import org.junit.Test; import java.io.IOException; import java.util.ArrayList; import java.util.List; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertNotNull; /** * @author Andres de la Pena {@literal <adelapena@stratio.com>} */ public class SnowballAnalyzerBuilderTest { @Test(expected = IndexException.class) public void testBuildNullLanguage() { new SnowballAnalyzerBuilder(null, null); } @Test(expected = IndexException.class) public void testBuildBlankLanguage() { new SnowballAnalyzerBuilder(" ", null); } @Test public void testBuildEnglish() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("English", null); testAnalyzer(builder, "organization", "organ"); } @Test public void testBuildFrench() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("French", null); testAnalyzer(builder, "contradictoirement", "contradictoir"); } @Test public void testBuildSpanish() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Spanish", null); testAnalyzer(builder, "perdido", "perd"); } @Test public void testBuildPortuguese() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Portuguese", null); testAnalyzer(builder, "boataria", "boat"); } @Test public void testBuildItalian() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Italian", null); testAnalyzer(builder, "abbandoneranno", "abbandon"); } @Test public void testBuildRomanian() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Romanian", null); testAnalyzer(builder, "absolutul", "absol"); } @Test public void testBuildGerman() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("German", null); testAnalyzer(builder, "katers", "kat"); } @Test public void testBuildDanish() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Danish", null); testAnalyzer(builder, "indtager", "indtag"); } @Test public void testBuildDutch() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Dutch", null); testAnalyzer(builder, "opglimlachten", "opglimlacht"); } @Test public void testBuilSwedish() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Swedish", null); testAnalyzer(builder, "grejer", "grej"); } @Test public void testBuildNorwegian() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Norwegian", null); testAnalyzer(builder, "stuff", "stuff"); } @Test public void testBuildRussian() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Russian", null); testAnalyzer(builder, "kapta", "kapta"); } @Test public void testBuildFinnish() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Finnish", null); testAnalyzer(builder, "jutut", "jutu"); } @Test public void testBuildIrish() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Irish", null); testAnalyzer(builder, "stuif", "stuif"); } @Test public void testBuildHungarian() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Hungarian", null); testAnalyzer(builder, "dolog", "dolog"); } @Test public void testBuildTurkish() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Turkish", null); testAnalyzer(builder, "tekneler", "tekne"); } @Test public void testBuildArmenian() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Armenian", null); testAnalyzer(builder, "megy", "megy"); } @Test public void testBuildBasque() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Basque", null); testAnalyzer(builder, "harrizko", "harri"); } @Test public void testBuildCatalan() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("Catalan", null); testAnalyzer(builder, "catalans", "catalan"); } @Test(expected = RuntimeException.class) public void testBuildWithWrongLanguage() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder("abc", null); testAnalyzer(builder, "organization", "organ"); } @Test(expected = IndexException.class) public void testBuildWithoutLanguage() { AnalyzerBuilder builder = new SnowballAnalyzerBuilder(null, null); testAnalyzer(builder, "organization", "organ"); } @Test public void testParseJSONWithoutStopwords() throws IOException { String json = "{type:\"snowball\", language:\"English\"}"; AnalyzerBuilder builder = JsonSerializer.fromString(json, AnalyzerBuilder.class); testAnalyzer(builder, "the dogs are hungry", "dog", "hungri"); } @Test public void testParseJSONWithStopwords() throws IOException { String json = "{type:\"snowball\", language:\"English\", stopwords:\"xx,yy\"}"; AnalyzerBuilder builder = JsonSerializer.fromString(json, AnalyzerBuilder.class); testAnalyzer(builder, "the dogs xx are hungry yy", "the", "dog", "are", "hungri"); } @Test(expected = IOException.class) public void testParseJSONInvalid() throws IOException { String json = "{class:\"abc\"}"; JsonSerializer.fromString(json, AnalyzerBuilder.class); } private void testAnalyzer(AnalyzerBuilder builder, String value, String... expected) { Analyzer analyzer = builder.analyzer(); assertNotNull("Expected not null analyzer", analyzer); List<String> tokens = analyze(value, analyzer); assertArrayEquals("Tokens are not the expected", expected, tokens.toArray()); analyzer.close(); } private List<String> analyze(String value, Analyzer analyzer) { List<String> result = new ArrayList<>(); TokenStream stream = null; try { stream = analyzer.tokenStream(null, value); stream.reset(); while (stream.incrementToken()) { String analyzedValue = stream.getAttribute(CharTermAttribute.class).toString(); result.add(analyzedValue); } } catch (Exception e) { throw new RuntimeException(e); } finally { IOUtils.closeWhileHandlingException(stream); } return result; } }