package edu.gatech.oad.rocket.findmythings.server.util; import com.googlecode.objectify.Objectify; import com.googlecode.objectify.cmd.LoadType; import com.googlecode.objectify.cmd.Query; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import java.io.IOException; import java.io.StringReader; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.logging.Logger; /** * Utility class for indexing and searching items conforming to the Searchable * interface. * * User: zw * Date: 4/8/13 * Time: 2:35 AM */ public class SearchableHelper { private static final Logger log = Logger.getLogger(SearchableHelper.class.getName()); // Magic numbers! private static final int MAXIMUM_NUMBER_OF_WORDS_TO_SEARCH = 10; private static final int MAX_NUMBER_OF_WORDS_TO_PUT_IN_INDEX = 200; public static <T extends Searchable> Query<T> search(Objectify objectify, Class<T> clazz, String searchString) { if (searchString == null || searchString.length() == 0 || objectify == null || clazz == null) return null; Set<String> queryTokens = getSearchTokens(searchString, MAXIMUM_NUMBER_OF_WORDS_TO_SEARCH); Query<T> query = objectify.load().type(clazz); if (queryTokens == null) return query; return query.filter("searchTokens in", queryTokens); } public static <T extends Searchable> Query<T> search(LoadType<T> query, String searchString) { if (searchString == null || searchString.length() == 0 || query == null) return null; Set<String> queryTokens = getSearchTokens(searchString, MAXIMUM_NUMBER_OF_WORDS_TO_SEARCH); if (queryTokens == null) return query; return query.filter("searchTokens in", queryTokens); } public static void addSearchFilter(Map<String, Object> queryFilters, String searchString) { if (searchString == null || searchString.length() == 0 || queryFilters == null) return; Set<String> queryTokens = getSearchTokens(searchString, MAXIMUM_NUMBER_OF_WORDS_TO_SEARCH); if (queryTokens == null) return; queryFilters.put("searchTokens in", queryTokens); } public static void updateSearchTokens(Searchable item) { Set<String> ftsTokens = item.getSearchTokens(); ftsTokens.clear(); if (!item.canGetSearchableContent()) { return; } String sb = item.getSearchableContent(); Set<String> new_ftsTokens = getSearchTokens(sb, MAX_NUMBER_OF_WORDS_TO_PUT_IN_INDEX); for (String token : new_ftsTokens) { ftsTokens.add(token); } } /** * Uses Apache Lucene English stemming for indexing similar words. * * @param searchableContext A string describing the object to be indexes * @param maximumNumberOfTokens The limit number of tokens to index * @return A set containing indexed, searchable tokens */ private static Set<String> getSearchTokens(String searchableContext, int maximumNumberOfTokens) { String indexCleanedOfHTMLTags = searchableContext.replaceAll("<.*?>"," "); try (Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_42)) { Set<String> returnSet = new HashSet<>(); TokenStream stream = analyzer.tokenStream(null, new StringReader(indexCleanedOfHTMLTags)); CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { String string = cattr.toString(); if (string != null && string.length() != 0) { returnSet.add(string); } if (returnSet.size() == maximumNumberOfTokens - 1) break; } stream.end(); stream.close(); if (returnSet.size() > 0) return returnSet; } catch (IOException e) { log.severe(e.getMessage()); } return null; } }