/**
* Copyright (C) 2011 JTalks.org Team
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.jtalks.jcommune.model.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.StopFilterFactory;
import org.hibernate.search.util.HibernateSearchResourceLoader;
/**
* Deletes stop words in the search text.
*
* @author Anuar_Nurmakanov
*
*/
public class StopWordsFilter implements SearchRequestFilter {
private List<String> stopWordsFiles;
private boolean ignoreCase;
/**
* @param stopWordsFiles list of files that contain stop words
* @param ignoreCase ignore case
*/
public StopWordsFilter(List<String> stopWordsFiles, boolean ignoreCase) {
this.stopWordsFiles = stopWordsFiles;
this.ignoreCase = ignoreCase;
}
/**
* {@inheritDoc}
*/
@Override
public String filter(String searchText) {
String result = searchText;
for (String stopWordsFile : stopWordsFiles) {
result = filter(result, stopWordsFile);
}
return result;
}
/**
* This method performs a filtration of the search text.
*
* @param searchText search text
* @param stopWordsFile file that contains stop words
* @return result of filtration
*/
private String filter(String searchText, String stopWordsFile) {
StopFilterFactory filterFactory = new StopFilterFactory();
Map<String, String> arguments = new HashMap<>();
arguments.put("words", stopWordsFile);
arguments.put("luceneMatchVersion", String.valueOf(Version.LUCENE_31));
arguments.put("ignoreCase", String.valueOf(ignoreCase));
filterFactory.init(arguments);
filterFactory.inform(new HibernateSearchResourceLoader());
Set<String> stopWords = (Set<String>)filterFactory.getStopWords();
List<String> searchTerms = splitSearchText(searchText);
searchTerms.removeAll(stopWords);
return joinSearchTerms(searchTerms);
}
/**
* Performs a splitting the search text.
*
* @param searchText search text
* @return list of terms
*/
private List<String> splitSearchText(String searchText) {
if (ignoreCase) {
searchText = searchText.toLowerCase();
}
return new ArrayList<>(
Arrays.asList(searchText.split("\\s"))
);
}
/**
* Creates the single string from list of terms.
*
* @param searchTerms search text
* @return the single string from list of terms
*/
private String joinSearchTerms(List<String> searchTerms) {
return StringUtils.join(searchTerms, " ");
}
}