/* * Licensed to STRATIO (C) under one or more contributor license agreements. * See the NOTICE file distributed with this work for additional information * regarding copyright ownership. The STRATIO (C) licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.stratio.cassandra.lucene.search.condition; import com.stratio.cassandra.lucene.IndexException; import com.stratio.cassandra.lucene.schema.mapping.SingleColumnMapper; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.Term; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.automaton.LevenshteinAutomata; /** * A {@link Condition} that implements the fuzzy search query. The similarity measurement is based on the * Damerau-Levenshtein (optimal string alignment) algorithm, though you can explicitly choose classic Levenshtein by * passing {@code false} to the {@code transpositions} parameter. * * @author Andres de la Pena {@literal <adelapena@stratio.com>} */ public class FuzzyCondition extends SingleColumnCondition { /** The default Damerau-Levenshtein max distance. */ public static final int DEFAULT_MAX_EDITS = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE; /** The default length of common (non-fuzzy) prefix. */ public static final int DEFAULT_PREFIX_LENGTH = 0; /** The default max expansions. */ public static final int DEFAULT_MAX_EXPANSIONS = 50; /** If transpositions should be treated as a primitive edit operation by default. */ public static final boolean DEFAULT_TRANSPOSITIONS = true; /** The fuzzy expression to be matched. */ public final String value; /** The Damerau-Levenshtein max distance. */ public final int maxEdits; /** The length of common (non-fuzzy) prefix. */ public final int prefixLength; /** The length of common (non-fuzzy) prefix. */ public final int maxExpansions; /** If transpositions should be treated as a primitive edit operation. */ public final boolean transpositions; /** * Returns a new {@link FuzzyCondition}. * * @param boost The boost for this query clause. Documents matching this clause will (in addition to the normal * weightings) have their score multiplied by {@code boost}. If {@code null}, then {@link #DEFAULT_BOOST} is used as * default. * @param field the field name * @param value the field fuzzy value * @param maxEdits must be {@literal >=} 0 and {@literal <=} {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength length of common (non-fuzzy) prefix * @param maxExpansions The maximum number of terms to match. If this number is greater than {@link * org.apache.lucene.search.BooleanQuery#getMaxClauseCount} when the query is rewritten, then the maxClauseCount * will be used instead. * @param transpositions {@code true} if transpositions should be treated as a primitive edit operation. If this is * {@code false}, comparisons will implement the classic Levenshtein algorithm. */ public FuzzyCondition(Float boost, String field, String value, Integer maxEdits, Integer prefixLength, Integer maxExpansions, Boolean transpositions) { super(boost, field); this.value = validateValue(value); this.maxEdits = validateMaxEdits(maxEdits); this.prefixLength = validatePrefixLength(prefixLength); this.maxExpansions = validateMaxExpansions(maxExpansions); this.transpositions = validateTranspositions(transpositions); } private static String validateValue(String value) { if (StringUtils.isBlank(value)) { throw new IndexException("Field value required"); } else { return value; } } private static Integer validateMaxEdits(Integer maxEdits) { if (maxEdits == null) { return DEFAULT_MAX_EDITS; } else if (maxEdits < 0 || maxEdits > 2) { throw new IndexException("max_edits must be between 0 and 2"); } else { return maxEdits; } } private static Integer validatePrefixLength(Integer prefixLength) { if (prefixLength == null) { return DEFAULT_PREFIX_LENGTH; } else if (prefixLength < 0) { throw new IndexException("prefix_length must be positive."); } else { return prefixLength; } } private static Integer validateMaxExpansions(Integer maxExpansions) { if (maxExpansions == null) { return DEFAULT_MAX_EXPANSIONS; } else if (maxExpansions < 0) { throw new IndexException("max_expansions must be positive."); } else { return maxExpansions; } } private static Boolean validateTranspositions(Boolean transpositions) { if (transpositions == null) { return DEFAULT_TRANSPOSITIONS; } else { return transpositions; } } /** {@inheritDoc} */ @Override public Query query(SingleColumnMapper<?> mapper, Analyzer analyzer) { if (mapper.base == String.class) { Term term = new Term(field, value); Query query = new FuzzyQuery(term, maxEdits, prefixLength, maxExpansions, transpositions); query.setBoost(boost); return query; } else { throw new IndexException("Fuzzy queries are not supported by mapper %s", mapper); } } /** {@inheritDoc} */ @Override public String toString() { return toStringHelper(this).add("value", value) .add("maxEdits", maxEdits) .add("prefixLength", prefixLength) .add("maxExpansions", maxExpansions) .add("transpositions", transpositions) .toString(); } }