/*
* Copyright 2014, Tuplejump Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.tuplejump.stargate.lucene;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.flexible.standard.config.NumericConfig;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.text.NumberFormat;
import java.util.Locale;
/**
* User: satya
*/
public class LuceneUtils {
public static final String RK_INDEXED = "_rk_idx";
public static final String RK_BYTES = "_rk_bytes";
public static final String PK_NAME_DOC_VAL = "_pk_name_val";
public static final String PK_INDEXED = "_pk_idx";
public static final String PK_BYTES = "_pk_bytes";
public static final String CF_TS_INDEXED = "_cf_ts";
private static final Logger logger = LoggerFactory.getLogger(LuceneUtils.class);
// NumberFormat instances are not thread safe
public static final ThreadLocal<NumberFormat> numberFormatThreadLocal =
new ThreadLocal<NumberFormat>() {
@Override
public NumberFormat initialValue() {
NumberFormat fmt = NumberFormat.getInstance();
fmt.setGroupingUsed(false);
fmt.setMinimumIntegerDigits(4);
return fmt;
}
};
public static NumericConfig numericConfig(FieldType fieldType) {
if (fieldType.numericType() != null) {
return new NumericConfigTL(fieldType.numericPrecisionStep(), fieldType.numericType());
}
return null;
}
public static File getDirectory(String ksName, String cfName, String indexName, String vNodeName) throws IOException {
String dirName = Options.defaultIndexesDir;
dirName = dirName + File.separator + ksName + File.separator + cfName + File.separator + vNodeName;
if (logger.isDebugEnabled()) {
logger.debug("SGIndex - INDEX_FILE_NAME - {}", indexName);
logger.debug("SGIndex - INDEX_DIR_NAME - {}", dirName);
}
//will only create parent if not existing.
return new File(dirName, indexName);
}
public static FieldType docValueTypeFrom(FieldType fieldType) {
FieldType docValType = new FieldType(fieldType);
if (fieldType.numericType() != null) docValType.setDocValuesType(DocValuesType.NUMERIC);
else docValType.setDocValuesType(DocValuesType.BINARY);
return docValType;
}
public static FieldType dynamicFieldType(Properties properties) {
FieldType fieldType = new FieldType();
fieldType.setTokenized(properties.isTokenized());
fieldType.setStored(properties.isStored());
fieldType.setStoreTermVectors(properties.isStoreTermVectors());
fieldType.setStoreTermVectorOffsets(properties.isStoreTermVectorOffsets());
fieldType.setStoreTermVectorPayloads(properties.isStoreTermVectorPayloads());
fieldType.setStoreTermVectorPositions(properties.isStoreTermVectorPositions());
fieldType.setOmitNorms(properties.isOmitNorms());
fieldType.setIndexOptions(properties.getIndexOptions());
if (properties.getType().isNumeric()) {
switch (properties.getType()) {
case integer:
fieldType.setNumericType(FieldType.NumericType.INT);
break;
case bigint:
fieldType.setNumericType(FieldType.NumericType.LONG);
break;
case decimal:
fieldType.setNumericType(FieldType.NumericType.FLOAT);
break;
default:
fieldType.setNumericType(FieldType.NumericType.DOUBLE);
break;
}
fieldType.setNumericPrecisionStep(properties.getNumericPrecisionStep());
}
return fieldType;
}
/**
* Deletes all files and subdirectories under "dir".
*
* @param dir Directory to be deleted
* @throws java.io.IOException if any part of the tree cannot be deleted
*/
public static void deleteRecursive(File dir) throws IOException {
if (dir.isDirectory()) {
String[] children = dir.list();
for (String child : children)
deleteRecursive(new File(dir, child));
}
// The directory is now empty so now it can be smoked
deleteWithConfirm(dir);
}
public static void deleteWithConfirm(File file) throws IOException {
assert file.exists() : "attempted to delete non-existing file " + file.getName();
if (logger.isDebugEnabled())
logger.debug("Deleting " + file.getName());
Files.delete(file.toPath());
}
public static Number numericDocValue(NumericDocValues rowKeyValues, int docId, Type type) throws IOException {
Long ref = rowKeyValues == null ? 0L : rowKeyValues.get(docId);
if (type == Type.integer) {
return ref.intValue();
} else if (type == Type.bigint) {
return ref;
} else if (type == Type.decimal) {
return Float.intBitsToFloat(ref.intValue());
} else if (type == Type.bigdecimal) {
return Double.longBitsToDouble(ref);
} else throw new IllegalArgumentException(String.format("Invalid type for numeric doc values <%s>", type));
}
public static SortedDocValues getPKBytesDocValues(LeafReader atomicReader) throws IOException {
return atomicReader.getSortedDocValues(LuceneUtils.PK_BYTES);
}
public static SortedDocValues getPKNameDocValues(LeafReader atomicReader) throws IOException {
return atomicReader.getSortedDocValues(LuceneUtils.PK_NAME_DOC_VAL);
}
public static SortedDocValues getRKBytesDocValues(LeafReader atomicReader) throws IOException {
return atomicReader.getSortedDocValues(LuceneUtils.RK_BYTES);
}
public static ByteBuffer byteBufferDocValue(BinaryDocValues docValues, int docId) throws IOException {
BytesRef ref = BytesRef.deepCopyOf(docValues.get(docId));
return ByteBuffer.wrap(ref.bytes, ref.offset, ref.length);
}
public static String stringDocValue(BinaryDocValues rowKeyValues, int docId) throws IOException {
BytesRef ref = rowKeyValues.get(docId);
return ref.utf8ToString();
}
public static String primaryKeyName(BinaryDocValues primaryKeyNames, int docId) throws IOException {
BytesRef ref = primaryKeyNames.get(docId);
return new String(ref.bytes, ref.offset, ref.length, StandardCharsets.UTF_8);
}
public static Field pkBytesDocValue(final ByteBuffer byteBufferValue) {
BytesRef bytesRef = new BytesRef(byteBufferValue.array(), byteBufferValue.arrayOffset(), byteBufferValue.limit());
return new SortedDocValuesField(PK_BYTES, bytesRef);
}
public static Field rkBytesDocValue(final ByteBuffer byteBufferValue) {
BytesRef bytesRef = new BytesRef(byteBufferValue.array(), byteBufferValue.arrayOffset(), byteBufferValue.limit());
return new SortedDocValuesField(RK_BYTES, bytesRef);
}
public static Field pkNameDocValue(final String pkName) {
BytesRef bytesRef = new BytesRef(pkName.getBytes(StandardCharsets.UTF_8));
return new SortedDocValuesField(PK_NAME_DOC_VAL, bytesRef) {
@Override
public String toString() {
return String.format("PK Name String->BinaryDocValuesField<%s>", pkName);
}
};
}
public static Field textField(String name, String value) {
return new TextField(name, value, Field.Store.NO);
}
public static Field stringField(String name, String value) {
return new StringField(name, value, Field.Store.NO);
}
public static Field doubleField(String name, String value) {
return new DoubleField(name, Double.parseDouble(value), Field.Store.NO);
}
public static Field longField(String name, String value) {
return new LongField(name, Long.parseLong(value), Field.Store.NO);
}
public static Field tsField(long timestamp, FieldType fieldType) {
return new LongField(CF_TS_INDEXED, timestamp, fieldType);
}
public static Term primaryKeyTerm(String pkString) {
return new Term(PK_INDEXED, pkString);
}
public static Field primaryKeyField(String pkString) {
return new StringField(PK_INDEXED, pkString, Field.Store.NO);
}
public static Term rowkeyTerm(String rkString) {
return new Term(RK_INDEXED, rkString);
}
public static Field rowKeyIndexed(String rkValue) {
return new StringField(RK_INDEXED, rkValue, Field.Store.NO);
}
public static Term tsTerm(long ts) {
BytesRefBuilder tsBytes = new BytesRefBuilder();
NumericUtils.longToPrefixCoded(ts, NumericUtils.PRECISION_STEP_DEFAULT, tsBytes);
return new Term(CF_TS_INDEXED, tsBytes);
}
public static Field field(String name, Properties properties, String value, FieldType fieldType) {
Type type = properties.getType();
if (type == Type.integer) {
return new IntField(name, Integer.parseInt(value), fieldType);
} else if (type == Type.bigint) {
return new LongField(name, Long.parseLong(value), fieldType);
} else if (type == Type.bigdecimal) {
return new DoubleField(name, Double.parseDouble(value), fieldType);
} else if (type == Type.decimal) {
return new FloatField(name, Float.parseFloat(value), fieldType);
} else if (type == Type.date) {
//TODO - set correct locale
FormatDateTimeFormatter formatter = Dates.forPattern(value, Locale.US);
return new LongField(name, formatter.parser().parseMillis(value), fieldType);
} else if (type == Type.bool) {
Boolean val = Boolean.parseBoolean(value);
return new Field(name, val.toString(), fieldType);
} else {
return new Field(name, value, fieldType);
}
}
public static Query getPKRangeDeleteQuery(String startPK, String endPK) {
return TermRangeQuery.newStringRange(PK_INDEXED, startPK, endPK, true, true);
}
public static Query getQueryUpdatedWithPKCondition(Query query, String partitionKeyString) {
if (partitionKeyString == null) {
return query;
} else {
BooleanQuery.Builder finalQuery = new BooleanQuery.Builder();
finalQuery.add(query, BooleanClause.Occur.MUST);
finalQuery.add(new TermQuery(LuceneUtils.rowkeyTerm(partitionKeyString)), BooleanClause.Occur.MUST);
return finalQuery.build();
}
}
public static class NumericConfigTL extends NumericConfig {
static NumberFormat dummyInstance = NumberFormat.getInstance();
public NumericConfigTL(int precisionStep, FieldType.NumericType type) {
super(precisionStep, dummyInstance, type);
}
@Override
public NumberFormat getNumberFormat() {
return numberFormatThreadLocal.get();
}
}
}