/*
* Copyright 2014, Tuplejump Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.tuplejump.stargate.lucene;
import com.google.common.collect.Ordering;
import com.google.common.collect.TreeMultimap;
import com.tuplejump.stargate.cassandra.TableMapper;
import com.tuplejump.stargate.lucene.query.Search;
import com.tuplejump.stargate.lucene.query.function.AggregateFunction;
import com.tuplejump.stargate.lucene.query.function.Function;
import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.composites.CellName;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.*;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
/**
* User: satya
* A custom lucene collector to retrieve index entries.
* An IndexEntry reads from DocValues to construct the row key, primary key and timestamp info.
*/
public class IndexEntryCollector extends SimpleCollector {
public final FieldValueHitQueue<IndexEntry> hitQueue;
LeafFieldComparator[] comparators;
int docBase;
int totalHits;
boolean queueFull;
IndexEntry bottom;
Scorer scorer;
int numHits;
final int[] reverseMul;
SortedDocValues pkNames;
SortedDocValues primaryKeys;
SortedDocValues rowKeys;
List<String> numericDocValueNamesToFetch;
List<String> binaryDocValueNamesToFetch;
Map<String, NumericDocValues> numericDocValuesMap = new HashMap<>();
Map<String, SortedDocValues> stringDocValues = new HashMap<>();
Options options;
List<IndexEntry> indexEntries;
TreeMultimap<DecoratedKey, IndexEntry> indexEntryTreeMultiMap;
TableMapper tableMapper;
public final boolean isSorted;
boolean canByPassRowFetch;
public boolean canByPassRowFetch() {
return canByPassRowFetch;
}
public int getTotalHits() {
return totalHits;
}
public IndexEntryCollector(TableMapper tableMapper, Search search, Options options, int maxResults) throws IOException {
Function function = search.function();
this.tableMapper = tableMapper;
this.options = options;
org.apache.lucene.search.SortField[] sortFields = search.usesSorting() ? search.sort(options) : null;
if (sortFields == null) {
hitQueue = FieldValueHitQueue.create(new org.apache.lucene.search.SortField[]{org.apache.lucene.search.SortField.FIELD_SCORE}, maxResults);
isSorted = false;
} else {
hitQueue = FieldValueHitQueue.create(sortFields, maxResults);
isSorted = true;
}
numHits = maxResults;
reverseMul = hitQueue.getReverseMul();
numericDocValueNamesToFetch = new ArrayList<>();
binaryDocValueNamesToFetch = new ArrayList<>();
if (function instanceof AggregateFunction) {
AggregateFunction aggregateFunction = (AggregateFunction) function;
List<String> groupByFields = aggregateFunction.getGroupByFields();
List<String> aggregateFields = aggregateFunction.getAggregateFields();
boolean abort = false;
FieldType[] groupDocValueTypes = null;
if (groupByFields != null && !abort) {
groupDocValueTypes = new FieldType[groupByFields.size()];
for (int i = 0; i < groupByFields.size(); i++) {
String field = groupByFields.get(i).toLowerCase();
FieldType docValType = getDocValueType(options, field);
if (docValType == null) {
abort = true;
break;
}
groupDocValueTypes[i] = docValType;
}
}
FieldType[] aggDocValueTypes = new FieldType[aggregateFields.size()];
if (!abort) {
for (int i = 0; i < aggregateFields.size(); i++) {
String field = aggregateFields.get(i);
FieldType docValType = getDocValueType(options, field);
if (docValType == null) {
abort = true;
break;
}
aggDocValueTypes[i] = docValType;
}
}
canByPassRowFetch = !abort;
if (canByPassRowFetch) {
if (groupByFields != null)
addToFetch(groupByFields.iterator(), groupDocValueTypes);
addToFetch(aggregateFields.iterator(), aggDocValueTypes);
}
}
}
private FieldType getDocValueType(Options options, String field) {
if (field == null) return null;
FieldType docValType = options.fieldDocValueTypes.get(field);
if (docValType == null)
docValType = options.collectionFieldDocValueTypes.get(Constants.dotSplitter.split(field).iterator().next());
return docValType;
}
private void addToFetch(Iterator<String> groupByFields, FieldType[] groupDocValueTypes) {
int i = 0;
while (groupByFields.hasNext()) {
String field = groupByFields.next();
FieldType docValType = groupDocValueTypes[i++];
if (docValType != null) {
if (docValType.numericType() != null)
numericDocValueNamesToFetch.add(field);
else
binaryDocValueNamesToFetch.add(field);
}
}
}
public List<IndexEntry> docs() {
if (indexEntries == null) {
indexEntries = new ArrayList<>();
IndexEntry entry;
while ((entry = hitQueue.pop()) != null) {
indexEntries.add(entry);
}
}
return indexEntries;
}
public TreeMultimap<DecoratedKey, IndexEntry> docsByRowKey() {
if (indexEntries != null) throw new IllegalStateException("Hit queue already traversed");
if (indexEntryTreeMultiMap == null) {
indexEntryTreeMultiMap = TreeMultimap.create(Ordering.natural(), new Comparator<IndexEntry>() {
@Override
public int compare(IndexEntry o1, IndexEntry o2) {
return tableMapper.clusteringCType.compare(o1.clusteringKey, o2.clusteringKey);
}
});
IndexEntry entry;
while ((entry = hitQueue.pop()) != null) {
indexEntryTreeMultiMap.put(entry.decoratedKey, entry);
}
}
return indexEntryTreeMultiMap;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
comparators = hitQueue.getComparators(context);
pkNames = LuceneUtils.getPKNameDocValues(context.reader());
primaryKeys = LuceneUtils.getPKBytesDocValues(context.reader());
rowKeys = LuceneUtils.getRKBytesDocValues(context.reader());
for (String docValName : numericDocValueNamesToFetch) {
numericDocValuesMap.put(docValName, context.reader().getNumericDocValues(docValName));
}
for (String docValName : binaryDocValueNamesToFetch) {
stringDocValues.put(docValName, context.reader().getSortedDocValues(docValName));
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
// set the scorer on all comparators
for (int i = 0; i < comparators.length; i++) {
comparators[i].setScorer(scorer);
}
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0; ; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
return;
}
}
int slot = bottom.slot;
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
// Compute score only if it is competitive.
final float score = scorer.score();
updateBottom(slot, doc, score);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
// Compute score only if it is competitive.
final float score = scorer.score();
add(slot, doc, score);
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
final void updateBottom(int slot, int doc, float score) throws IOException {
hitQueue.pop();
bottom = getIndexEntry(slot, doc, score);
hitQueue.add(bottom);
}
final void add(int slot, int doc, float score) throws IOException {
IndexEntry entry = getIndexEntry(slot, doc, score);
bottom = hitQueue.add(entry);
queueFull = (totalHits == numHits);
}
IndexEntry getIndexEntry(int slot, int doc, float score) throws IOException {
String pkName = LuceneUtils.primaryKeyName(pkNames, doc);
ByteBuffer primaryKey = LuceneUtils.byteBufferDocValue(primaryKeys, doc);
ByteBuffer rowKey = LuceneUtils.byteBufferDocValue(rowKeys, doc);
Map<String, Number> numericDocValues = new HashMap<>();
Map<String, String> binaryDocValues = new HashMap<>();
for (Map.Entry<String, NumericDocValues> entry : numericDocValuesMap.entrySet()) {
Type type = AggregateFunction.getLuceneType(options, entry.getKey());
Number number = LuceneUtils.numericDocValue(entry.getValue(), doc, type);
numericDocValues.put(entry.getKey(), number);
}
for (Map.Entry<String, SortedDocValues> entry : stringDocValues.entrySet()) {
binaryDocValues.put(entry.getKey(), LuceneUtils.stringDocValue(entry.getValue(), doc));
}
return new IndexEntry(rowKey, pkName, primaryKey, slot, docBase + doc, score, numericDocValues, binaryDocValues);
}
@Override
public boolean needsScores() {
return false;
}
public class IndexEntry extends FieldValueHitQueue.Entry {
public final String pkName;
public final ByteBuffer primaryKey;
public final ByteBuffer rowKey;
public float score;
Map<String, Number> numericDocValuesMap;
Map<String, String> binaryDocValuesMap;
public final CellName clusteringKey;
public final DecoratedKey decoratedKey;
public IndexEntry(ByteBuffer rowKey, String pkName, ByteBuffer primaryKey,
int slot, int doc, float score,
Map<String, Number> numericDocValuesMap,
Map<String, String> binaryDocValuesMap) {
super(slot, doc, score);
this.rowKey = rowKey;
this.pkName = pkName;
this.primaryKey = primaryKey;
this.clusteringKey = tableMapper.makeClusteringKey(primaryKey);
this.decoratedKey = tableMapper.decorateKey(rowKey);
this.score = score;
this.binaryDocValuesMap = binaryDocValuesMap;
this.numericDocValuesMap = numericDocValuesMap;
}
public Number getNumber(String field) {
return numericDocValuesMap.get(field);
}
public String getString(String field) {
return binaryDocValuesMap.get(field);
}
@Override
public String toString() {
return super.toString() + "pkName[" + pkName + "]";
}
}
}