/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.meltwater.elasticsearch.index;
import com.meltwater.elasticsearch.action.BatchPercolateResponseItem;
import com.meltwater.elasticsearch.action.BatchPercolateShardRequest;
import com.meltwater.elasticsearch.action.BatchPercolateShardResponse;
import com.meltwater.elasticsearch.shard.BatchPercolatorQueriesRegistry;
import com.meltwater.elasticsearch.shard.QueryAndSource;
import com.meltwater.elasticsearch.shard.QueryMatch;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Counter;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.cache.recycler.PageCacheRecycler;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.common.base.Optional;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.xcontent.*;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.fetch.FetchPhase;
import org.elasticsearch.search.highlight.HighlightPhase;
import org.elasticsearch.search.internal.DefaultSearchContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.ShardSearchLocalRequest;
import org.elasticsearch.search.query.QueryPhase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentMap;
/**
* For each request, the {@link BatchPercolatorService#percolate} function is
* called on each shard which contains percolation queries.
*
* The percolation is then executed in the following way:
* 1. Create a RAMDirectory and index all documents in the request to it
* 2. For each query:
* 2.1 Parse highlighting options and set it to the search context (a class which is extremely stateful).
* 2.2 Execute the query and collect results
* 3. Send back the matches for each documents in a BatchPercolateShardResponse.
*/
public class BatchPercolatorService extends AbstractComponent {
public final static String TYPE_NAME = ".batchpercolator";
private final IndicesService indicesService;
private final CacheRecycler cacheRecycler;
private final PageCacheRecycler pageCacheRecycler;
private final BigArrays bigArrays;
private final ClusterService clusterService;
private final HighlightPhase highlightPhase;
private final ScriptService scriptService;
private final MappingUpdatedAction mappingUpdatedAction;
private QueryPhase queryPhase;
private FetchPhase fetchPhase;
@Inject
public BatchPercolatorService(Settings settings, IndicesService indicesService, CacheRecycler cacheRecycler,
PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
HighlightPhase highlightPhase, ClusterService clusterService,
ScriptService scriptService,
MappingUpdatedAction mappingUpdatedAction,
QueryPhase queryPhase,
FetchPhase fetchPhase) {
super(settings);
this.indicesService = indicesService;
this.cacheRecycler = cacheRecycler;
this.pageCacheRecycler = pageCacheRecycler;
this.bigArrays = bigArrays;
this.clusterService = clusterService;
this.highlightPhase = highlightPhase;
this.scriptService = scriptService;
this.mappingUpdatedAction = mappingUpdatedAction;
this.queryPhase = queryPhase;
this.fetchPhase = fetchPhase;
}
public BatchPercolateShardResponse percolate(BatchPercolateShardRequest request) throws IOException {
long requestId = request.hashCode();
IndexService percolateIndexService = indicesService.indexServiceSafe(request.shardId().getIndex());
IndexShard indexShard = percolateIndexService.shardSafe(request.shardId().getId());
indexShard.readAllowed(); // check if we can read the shard...
ConcurrentMap<String, QueryAndSource> percolateQueries = percolateIndexService.shardInjectorSafe(indexShard.shardId().id())
.getInstance(BatchPercolatorQueriesRegistry.class)
.percolateQueries();
List<ParsedDocument> parsedDocuments = parseRequest(percolateIndexService, request);
if (percolateQueries.isEmpty()) {
return new BatchPercolateShardResponse(emptyPeroclateResponses(parsedDocuments),
request.shardId().getIndex(), request.shardId().id());
} else if (parsedDocuments == null) {
throw new ElasticsearchIllegalArgumentException("Nothing to percolate");
}
// We use a RAMDirectory here instead of a MemoryIndex.
// In our tests MemoryIndex had worse indexing performance for normal sized quiddities.
RamDirectoryPercolatorIndex index = new RamDirectoryPercolatorIndex(indexShard.mapperService());
Directory directory = index.indexDocuments(parsedDocuments);
SearchContext context = createSearchContext(request, percolateIndexService, indexShard, directory);
long filteringStart = System.currentTimeMillis();
Map<String, QueryAndSource> filteredQueries = filterQueriesToSearchWith(percolateQueries, directory);
logger.debug("{}-{} Percolation queries filtered down to '{}' items in '{}' ms'.",
request.shardId(),
requestId,
filteredQueries.size(),
System.currentTimeMillis() - filteringStart
);
//Perform the actual matching
Map<String, BatchPercolateResponseItem> responses = percolateResponses(
context, filteredQueries, parsedDocuments);
directory.close();
context.close();
percolateIndexService.fixedBitSetFilterCache().clear("Done percolating "+requestId);
percolateIndexService.fieldData().clear();
percolateIndexService.cache().clear("Done percolating "+requestId);
return new BatchPercolateShardResponse(responses, request.shardId().getIndex(), request.shardId().id());
}
private Map<String, QueryAndSource> filterQueriesToSearchWith(ConcurrentMap<String, QueryAndSource> percolateQueries, Directory directory) throws IOException {
Map<String, QueryAndSource> filteredQueries = new HashMap<>();
try(DirectoryReader reader = DirectoryReader.open(directory)){
for(Map.Entry<String, QueryAndSource> entry:percolateQueries.entrySet()){
try{
if(hasDocumentMatchingFilter(reader, entry.getValue().getLimitingFilter())){
filteredQueries.put(entry.getKey(), entry.getValue());
}
} catch (Exception e){
logger.warn(
"Failed to pre-filter query. Assuming that it should be matched anyway. Query ID: {}, Filter: {}",
e, entry.getKey(), entry.getValue().getLimitingFilter());
filteredQueries.put(entry.getKey(), entry.getValue());
}
}
}
return filteredQueries;
}
private boolean hasDocumentMatchingFilter(IndexReader reader, Optional<Filter> optionalFilter) throws IOException {
if(optionalFilter.isPresent()){
Filter filter = optionalFilter.get();
boolean found = false;
// If you are not familiar with Lucene, this basically means that we try to
// create an iterator for valid id:s for the filter for the given reader.
// The filter and DocIdSet can both return null, to enable optimisations,
// thus the null-checks. Null means that there were no matching docs, and
// the same is true if the iterator refers to NO_MORE_DOCS immediately.
for(AtomicReaderContext leaf:reader.leaves()) {
DocIdSet idSet = filter.getDocIdSet(leaf, leaf.reader().getLiveDocs());
if (idSet != null) {
DocIdSetIterator iter = idSet.iterator();
if (iter != null && iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
found = true;
break;
}
}
}
return found;
}
else{
return true;
}
}
private class DocSearcher extends Engine.Searcher {
private DocSearcher(IndexSearcher searcher) {
super("percolate", searcher);
}
@Override
public void close() throws ElasticsearchException {
try {
this.reader().close();
} catch (IOException e) {
throw new ElasticsearchException("failed to close IndexReader in batch percolator", e);
}
}
}
private SearchContext createSearchContext(BatchPercolateShardRequest request,
IndexService percolateIndexService,
IndexShard indexShard,
Directory directory) throws IOException {
SearchShardTarget searchShardTarget = new SearchShardTarget(clusterService.localNode().id(),
request.shardId().getIndex(), request.shardId().id());
return new DefaultSearchContext(
0,
new ShardSearchLocalRequest(new ShardId("local_index",0), 0, SearchType.QUERY_AND_FETCH, null, null, false),
searchShardTarget,
new DocSearcher(new IndexSearcher(DirectoryReader.open(directory))),
percolateIndexService,
indexShard,
scriptService,
cacheRecycler,
pageCacheRecycler,
bigArrays,
Counter.newCounter()
);
}
private Map<String, BatchPercolateResponseItem> emptyPeroclateResponses(List<ParsedDocument> parsedDocuments) {
Map<String, BatchPercolateResponseItem> items = Maps.newHashMap();
for(ParsedDocument document : parsedDocuments){
items.put(document.id(),
new BatchPercolateResponseItem(Maps.<String, QueryMatch>newHashMap(), document.id()));
}
return items;
}
private List<ParsedDocument> parseRequest(IndexService documentIndexService, BatchPercolateShardRequest request) throws ElasticsearchException {
BytesReference source = request.source();
if (source == null || source.length() == 0) {
return null;
}
List<ParsedDocument> docs = new ArrayList<>();
XContentParser parser = null;
try {
parser = XContentFactory.xContent(source).createParser(source);
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
if("docs".equals(parser.currentName())){
docs.addAll(parsedDocuments(documentIndexService, request, parser));
}
}
}
} catch (Throwable e) {
throw new ElasticsearchParseException("failed to parse request", e);
} finally {
if (parser != null) {
parser.close();
}
}
return docs;
}
private List<ParsedDocument> parsedDocuments(IndexService documentIndexService,
BatchPercolateShardRequest request,
XContentParser parser) throws IOException {
List<ParsedDocument> docs = new ArrayList<>();
parser.nextToken();
while ((parser.nextToken()) != XContentParser.Token.END_ARRAY) {
MapperService mapperService = documentIndexService.mapperService();
Tuple<DocumentMapper, Boolean> docMapperTuple = mapperService.documentMapperWithAutoCreate(request.documentType());
BytesStreamOutput bStream = new BytesStreamOutput();
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON, bStream);
builder.copyCurrentStructure(parser);
builder.close();
docs.add(getParsedDocument(documentIndexService, request, docMapperTuple.v1(), bStream));
bStream.close();
}
return docs;
}
private ParsedDocument getParsedDocument(IndexService documentIndexService,
BatchPercolateShardRequest request,
DocumentMapper docMapper,
BytesStreamOutput bStream) {
ParsedDocument doc = docMapper.parse(bStream.bytes());
if (doc.mappingsModified()) {
mappingUpdatedAction.updateMappingOnMaster(
request.shardId().getIndex(), docMapper,
documentIndexService.indexUUID()
);
}
return doc;
}
private Map<String, BatchPercolateResponseItem> percolateResponses(SearchContext context, Map<String, QueryAndSource> percolateQueries, List<ParsedDocument> parsedDocuments) {
Map<String, BatchPercolateResponseItem> responses = Maps.newHashMap();
for(ParsedDocument document : parsedDocuments){
responses.put(document.id(), new BatchPercolateResponseItem(document.id()));
}
for (Map.Entry<String, QueryAndSource> entry : percolateQueries.entrySet()) {
try{
executeSearch(context, entry.getValue());
for (SearchHit searchHit : context.fetchResult().hits()) {
String id = searchHit.getId();
BatchPercolateResponseItem batchPercolateResponseItem = responses.get(id);
QueryMatch queryMatch = getQueryMatch(entry, searchHit);
batchPercolateResponseItem.getMatches().put(queryMatch.getQueryId(), queryMatch);
}
}
catch (Exception e){
logger.warn(
"Failed to execute query. Will not add it to matches. Query ID: {}, Query: {}",
e, entry.getKey(), entry.getValue().getQuery());
}
}
return responses;
}
private void executeSearch(SearchContext context, QueryAndSource queryAndSource) {
parseHighlighting(context, queryAndSource.getSource());
context.parsedQuery(new ParsedQuery(queryAndSource.getQuery(), ImmutableMap.<String, Filter>of()));
if (context.from() == -1) {
context.from(0);
}
if (context.size() == -1) {
context.size(Integer.MAX_VALUE);
}
queryPhase.preProcess(context);
fetchPhase.preProcess(context);
queryPhase.execute(context);
setDocIdsToLoad(context);
fetchPhase.execute(context);
}
private QueryMatch getQueryMatch(Map.Entry<String, QueryAndSource> entry, SearchHit searchHit) {
QueryMatch queryMatch = new QueryMatch();
queryMatch.setQueryId(entry.getKey());
queryMatch.setHighlighs(searchHit.highlightFields());
return queryMatch;
}
private void setDocIdsToLoad(SearchContext context) {
TopDocs topDocs = context.queryResult().topDocs();
int totalSize = context.from() + context.size();
int[] docIdsToLoad = new int[topDocs.totalHits];
int counter = 0;
for (int i = context.from(); i < totalSize; i++) {
if (i < topDocs.scoreDocs.length) {
docIdsToLoad[counter] = topDocs.scoreDocs[i].doc;
} else {
break;
}
counter++;
}
context.docIdsToLoad(docIdsToLoad, 0, counter);
}
//TODO do this when query is loaded into memory instead!
private void parseHighlighting(SearchContext context, BytesReference source){
XContentParser parser = null;
Map<String, ? extends SearchParseElement> hlElements = highlightPhase.parseElements();
try {
parser = XContentFactory.xContent(source).createParser(source);
XContentParser.Token token;
while ((token = parser.nextToken()) != null) {
if (token == XContentParser.Token.FIELD_NAME) {
String fieldName = parser.currentName();
parser.nextToken();
SearchParseElement element = hlElements.get(fieldName);
if (element != null) {
element.parse(parser, context);
break;
}
}
}
} catch (Exception e) {
String sSource = "_na_";
try {
sSource = XContentHelper.convertToJson(source, false);
} catch (Throwable ignore) {}
throw new SearchParseException(context, "Failed to parse source [" + sSource + "]", e);
} finally {
if (parser != null) {
parser.close();
}
}
}
}