/**
* Copyright (c) 2000-present Liferay, Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*/
package com.liferay.portal.kernel.search.suggest;
import com.liferay.portal.kernel.log.Log;
import com.liferay.portal.kernel.log.LogFactoryUtil;
import com.liferay.portal.kernel.search.Document;
import com.liferay.portal.kernel.search.DocumentImpl;
import com.liferay.portal.kernel.search.Field;
import com.liferay.portal.kernel.search.SearchContext;
import com.liferay.portal.kernel.search.SearchException;
import com.liferay.portal.kernel.util.StringPool;
import java.io.InputStream;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* @author Michael C. Han
*/
public abstract class BaseGenericSpellCheckIndexWriter
extends BaseSpellCheckIndexWriter {
public void setBatchSize(int batchSize) {
_batchSize = batchSize;
}
public void setDocumentPrototype(Document documentPrototype) {
_documentPrototype = documentPrototype;
}
protected abstract void addDocument(
String documentType, SearchContext searchContext, Document document)
throws SearchException;
protected abstract void addDocuments(
String documentType, SearchContext searchContext,
Collection<Document> documents)
throws SearchException;
protected void addNGramFields(
Document document, Map<String, String> nGrams) {
for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
}
}
protected Document createDocument() {
return (Document)_documentPrototype.clone();
}
protected Document createDocument(
long companyId, long groupId, String languageId, String keywords,
float weight, String keywordFieldName, String typeFieldValue,
int maxNGramLength)
throws SearchException {
Document document = createDocument();
document.addKeyword(Field.COMPANY_ID, companyId);
document.addKeyword(Field.GROUP_ID, groupId);
document.addKeyword(Field.LANGUAGE_ID, languageId);
document.addKeyword(Field.PRIORITY, String.valueOf(weight));
document.addKeyword(Field.SPELL_CHECK_WORD, true);
document.addKeyword(keywordFieldName, keywords);
document.addKeyword(Field.TYPE, typeFieldValue);
document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
keywords, maxNGramLength);
addNGramFields(document, nGramHolder.getNGramEnds());
Map<String, List<String>> nGrams = nGramHolder.getNGrams();
for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
String fieldName = entry.getKey();
for (String nGram : entry.getValue()) {
document.addKeyword(fieldName, nGram);
}
}
addNGramFields(document, nGramHolder.getNGramStarts());
return document;
}
@Override
protected void indexKeyword(
SearchContext searchContext, long groupId, String languageId,
String keyword, float weight, String keywordFieldName,
String typeFieldValue, int maxNGramLength)
throws Exception {
Document document = createDocument(
searchContext.getCompanyId(), groupId, languageId, keyword, weight,
keywordFieldName, typeFieldValue, maxNGramLength);
addDocument(typeFieldValue, searchContext, document);
}
@Override
protected void indexKeywords(
SearchContext searchContext, long groupId, String languageId,
InputStream inputStream, String keywordFieldName,
String typeFieldValue, int maxNGramLength)
throws Exception {
Set<Document> documents = new HashSet<>();
try {
DictionaryReader dictionaryReader = new DictionaryReader(
inputStream, StringPool.UTF8);
Iterator<DictionaryEntry> iterator =
dictionaryReader.getDictionaryEntriesIterator();
int counter = 0;
while (iterator.hasNext()) {
counter++;
DictionaryEntry dictionaryEntry = iterator.next();
Document document = createDocument(
searchContext.getCompanyId(), groupId, languageId,
dictionaryEntry.getWord(), dictionaryEntry.getWeight(),
keywordFieldName, typeFieldValue, maxNGramLength);
documents.add(document);
if ((counter == _batchSize) || !iterator.hasNext()) {
addDocuments(typeFieldValue, searchContext, documents);
documents.clear();
counter = 0;
}
}
}
catch (Exception e) {
if (_log.isWarnEnabled()) {
_log.warn("Unable to index dictionaries", e);
}
throw new SearchException(e.getMessage(), e);
}
}
private static final int _DEFAULT_BATCH_SIZE = 1000;
private static final Log _log = LogFactoryUtil.getLog(
BaseGenericSpellCheckIndexWriter.class);
private int _batchSize = _DEFAULT_BATCH_SIZE;
private Document _documentPrototype = new DocumentImpl();
}