package services.repository;
import com.typesafe.config.Config;
import helpers.JsonLdConstants;
import models.Record;
import models.Resource;
import models.ResourceList;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.lucene.search.function.CombineFunction;
import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.sort.SortOrder;
import play.Logger;
import services.ElasticsearchConfig;
import services.QueryContext;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;
public class ElasticsearchRepository extends Repository implements Readable, Writable, Queryable, Aggregatable {
private static ElasticsearchConfig mConfig;
private Client mClient;
private Fuzziness mFuzziness;
public ElasticsearchRepository(Config aConfiguration) {
super(aConfiguration);
mConfig = new ElasticsearchConfig(aConfiguration);
Settings settings = Settings.settingsBuilder().put(mConfig.getClientSettings()).build();
try {
mClient = TransportClient.builder().settings(settings).build().addTransportAddress(
new InetSocketTransportAddress(InetAddress.getByName(mConfig.getServer()), mConfig.getJavaPort()));
} catch (UnknownHostException ex) {
throw new RuntimeException(ex);
}
mFuzziness = mConfig.getFuzziness();
}
@Override
public void addResource(@Nonnull final Resource aResource, Map<String, String> aMetadata) throws IOException {
Record record = new Record(aResource);
for (String key : aMetadata.keySet()) {
record.put(key, aMetadata.get(key));
}
addJson(record.toString(), record.getId(), Record.TYPE);
refreshIndex(mConfig.getIndex());
}
@Override
public void addResources(@Nonnull List<Resource> aResources, Map<String, String> aMetadata) throws IOException {
Map<String, String> records = new HashMap<>();
for (Resource resource : aResources) {
Record record = new Record(resource);
for (String key : aMetadata.keySet()) {
record.put(key, aMetadata.get(key));
}
records.put(record.getId(), record.toString());
}
addJson(records, Record.TYPE);
refreshIndex(mConfig.getIndex());
}
@Override
public Resource getResource(@Nonnull String aId) {
return Resource.fromMap(getDocument(Record.TYPE, aId));
}
public List<Resource> getResources(@Nonnull String aField, @Nonnull Object aValue) {
List<Resource> resources = new ArrayList<>();
for (Map<String, Object> doc : getDocuments(aField, aValue)) {
resources.add(Resource.fromMap(doc));
}
return resources;
}
@Override
public List<Resource> getAll(@Nonnull String aType) throws IOException {
List<Resource> resources = new ArrayList<>();
for (Map<String, Object> doc : getDocuments(Record.RESOURCE_KEY.concat(".")
.concat(JsonLdConstants.TYPE), aType)) {
resources.add(Resource.fromMap(doc));
}
return resources;
}
@Override
public Resource deleteResource(@Nonnull String aId, Map<String, String> aMetadata) {
Resource resource = getResource(aId.concat(".").concat(Record.RESOURCE_KEY));
if (null == resource) {
return null;
}
boolean found = deleteDocument(Record.TYPE, resource.getId());
refreshIndex(mConfig.getIndex());
Logger.trace("Deleted " + aId + " from Elasticsearch");
if (found) {
return resource;
} else {
return null;
}
}
@Override
public Resource aggregate(@Nonnull AggregationBuilder<?> aAggregationBuilder) throws IOException {
return aggregate(aAggregationBuilder, null);
}
public Resource aggregate(@Nonnull AggregationBuilder<?> aAggregationBuilder, QueryContext aQueryContext) {
Resource aggregations = Resource
.fromJson(getAggregation(aAggregationBuilder, aQueryContext).toString());
if (null == aggregations) {
return null;
}
return (Resource) aggregations.get("aggregations");
}
public Resource aggregate(@Nonnull List<AggregationBuilder<?>> aAggregationBuilders, QueryContext aQueryContext) {
Resource aggregations = Resource
.fromJson(getAggregations(aAggregationBuilders, aQueryContext).toString());
if (null == aggregations) {
return null;
}
return (Resource) aggregations.get("aggregations");
}
/**
* This search method is designed to be able to make use of the complete
* Elasticsearch query syntax, as described in
* http://www.elasticsearch.org/guide
* /en/elasticsearch/reference/current/search-uri-request.html .
*
* @param aQueryString
* A string describing the query
* @param aFilters
* @return A resource resembling the result set of resources matching the
* criteria given in the query string
* @throws IOException
*/
@Override
public ResourceList query(@Nonnull String aQueryString, int aFrom, int aSize, String aSortOrder,
Map<String, List<String>> aFilters) throws IOException {
return query(aQueryString, aFrom, aSize, aSortOrder, aFilters, null);
}
public ResourceList query(@Nonnull String aQueryString, int aFrom, int aSize, String aSortOrder,
Map<String, List<String>> aFilters, QueryContext aQueryContext) throws IOException {
SearchResponse response = esQuery(aQueryString, aFrom, aSize, aSortOrder, aFilters, aQueryContext);
Iterator<SearchHit> searchHits = response.getHits().iterator();
List<Resource> matches = new ArrayList<>();
while (searchHits.hasNext()) {
Resource match = Resource.fromMap(searchHits.next().sourceAsMap());
matches.add(match);
}
// FIXME: response.toString returns string serializations of scripted keys
Resource aAggregations = (Resource) Resource.fromJson(response.toString()).get("aggregations");
return new ResourceList(matches, response.getHits().getTotalHits(), aQueryString, aFrom, aSize, aSortOrder,
aFilters, aAggregations);
}
/**
* Add a document consisting of a JSON String specified by a given UUID and a
* given type.
*
* @param aJsonString
*/
public void addJson(final String aJsonString, final String aUuid, final String aType) {
mClient.prepareIndex(mConfig.getIndex(), aType, aUuid).setSource(aJsonString).execute()
.actionGet();
}
/**
* Add documents consisting of JSON Strings specified by a given UUID and a
* given type.
*
* @param aJsonStringIdMap
*/
public void addJson(final Map<String, String> aJsonStringIdMap, final String aType) {
BulkRequestBuilder bulkRequest = mClient.prepareBulk();
for (Map.Entry<String, String> entry : aJsonStringIdMap.entrySet()) {
String id = entry.getKey();
String json = entry.getValue();
bulkRequest.add(mClient.prepareIndex(mConfig.getIndex(), aType, id).setSource(json));
}
BulkResponse bulkResponse = bulkRequest.execute().actionGet();
if (bulkResponse.hasFailures()) {
Logger.error(bulkResponse.buildFailureMessage());
}
}
private List<Map<String, Object>> getDocuments(final String aField, final Object aValue) {
final int docsPerPage = 1024;
int count = 0;
SearchResponse response = null;
final List<Map<String, Object>> docs = new ArrayList<>();
while (response == null || response.getHits().hits().length != 0) {
response = mClient.prepareSearch(mConfig.getIndex())
.setQuery(QueryBuilders.queryStringQuery(aField.concat(":").concat(QueryParser.escape(aValue.toString()))))
.setSize(docsPerPage).setFrom(count * docsPerPage).execute().actionGet();
for (SearchHit hit : response.getHits()) {
docs.add(hit.getSource());
}
count++;
}
return docs;
}
/**
* Get a document of a specified type specified by an identifier.
*
* @param aType
* @param aIdentifier
* @return the document as Map of String/Object
*/
private Map<String, Object> getDocument(@Nonnull final String aType,
@Nonnull final String aIdentifier) {
final GetResponse response = mClient.prepareGet(mConfig.getIndex(), aType, aIdentifier)
.execute().actionGet();
return response.getSource();
}
private boolean deleteDocument(@Nonnull final String aType, @Nonnull final String aIdentifier) {
final DeleteResponse response = mClient.prepareDelete(mConfig.getIndex(), aType, aIdentifier)
.execute().actionGet();
return response.isFound();
}
private SearchResponse getAggregation(final AggregationBuilder<?> aAggregationBuilder, QueryContext aQueryContext) {
SearchRequestBuilder searchRequestBuilder = mClient.prepareSearch(mConfig.getIndex());
BoolQueryBuilder globalAndFilter = QueryBuilders.boolQuery();
if (!(null == aQueryContext)) {
for (QueryBuilder contextFilter : aQueryContext.getFilters()) {
globalAndFilter.must(contextFilter);
}
}
SearchResponse response = searchRequestBuilder.addAggregation(aAggregationBuilder)
.setQuery(QueryBuilders.boolQuery().filter(globalAndFilter))
.setSize(0).execute().actionGet();
return response;
}
private SearchResponse getAggregations(final List<AggregationBuilder<?>> aAggregationBuilders, QueryContext
aQueryContext) {
SearchRequestBuilder searchRequestBuilder = mClient.prepareSearch(mConfig.getIndex());
BoolQueryBuilder globalAndFilter = QueryBuilders.boolQuery();
if (!(null == aQueryContext)) {
for (QueryBuilder contextFilter : aQueryContext.getFilters()) {
globalAndFilter.must(contextFilter);
}
}
for (AggregationBuilder<?> aggregationBuilder : aAggregationBuilders) {
searchRequestBuilder.addAggregation(aggregationBuilder);
}
return searchRequestBuilder.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), globalAndFilter))
.setSize(0).execute().actionGet();
}
private SearchResponse esQuery(@Nonnull String aQueryString, int aFrom, int aSize, String aSortOrder,
Map<String, List<String>> aFilters, QueryContext aQueryContext) {
SearchRequestBuilder searchRequestBuilder = mClient.prepareSearch(mConfig.getIndex());
BoolQueryBuilder globalAndFilter = QueryBuilders.boolQuery();
String[] fieldBoosts = null;
if (!(null == aQueryContext)) {
searchRequestBuilder.setFetchSource(aQueryContext.getFetchSource(), null);
for (QueryBuilder contextFilter : aQueryContext.getFilters()) {
globalAndFilter.must(contextFilter);
}
for (AggregationBuilder<?> contextAggregation : aQueryContext.getAggregations()) {
searchRequestBuilder.addAggregation(contextAggregation);
}
if (aQueryContext.hasFieldBoosts()) {
fieldBoosts = aQueryContext.getElasticsearchFieldBoosts();
}
if (null != aQueryContext.getZoomTopLeft() && null != aQueryContext.getZoomBottomRight()) {
GeoBoundingBoxQueryBuilder zoomFilter = QueryBuilders.geoBoundingBoxQuery("about.location.geo")
.topLeft(aQueryContext.getZoomTopLeft())
.bottomRight(aQueryContext.getZoomBottomRight());
globalAndFilter.must(zoomFilter);
}
if (null != aQueryContext.getPolygonFilter() && !aQueryContext.getPolygonFilter().isEmpty()) {
GeoPolygonQueryBuilder polygonFilter = QueryBuilders.geoPolygonQuery("about.location.geo");
for (GeoPoint geoPoint : aQueryContext.getPolygonFilter()){
polygonFilter.addPoint(geoPoint);
}
globalAndFilter.must(polygonFilter);
}
}
if (!StringUtils.isEmpty(aSortOrder)) {
String[] sort = aSortOrder.split(":");
if (2 == sort.length) {
searchRequestBuilder.addSort(sort[0], sort[1].toUpperCase().equals("ASC") ? SortOrder.ASC : SortOrder.DESC);
} else {
Logger.trace("Invalid sort string: " + aSortOrder);
}
}
if (!(null == aFilters)) {
BoolQueryBuilder aggregationAndFilter = QueryBuilders.boolQuery();
for (Map.Entry<String, List<String>> entry : aFilters.entrySet()) {
BoolQueryBuilder orFilterBuilder = QueryBuilders.boolQuery();
String filterName = entry.getKey();
for (String filterValue : entry.getValue()) {
if (filterName.endsWith(".GTE")) {
filterName = filterName.substring(0, filterName.length()-".GTE".length());
orFilterBuilder.should(QueryBuilders.rangeQuery(filterName).gte(filterValue));
} else {
// This could also be 'must' queries, allowing to narrow down the result list
orFilterBuilder.should(QueryBuilders.termQuery(filterName, filterValue));
}
}
aggregationAndFilter.must(orFilterBuilder);
}
globalAndFilter.must(aggregationAndFilter);
}
QueryBuilder queryBuilder;
if (!StringUtils.isEmpty(aQueryString)) {
if (aQueryString.endsWith("!")) {
aQueryString = aQueryString.substring(0, aQueryString.lastIndexOf('!')).concat("\\!");
Logger.trace("Modify query: insert escape '\\' in front of '!': ".concat(aQueryString));
}
queryBuilder = QueryBuilders.queryStringQuery(aQueryString).fuzziness(mFuzziness)
.defaultOperator(QueryStringQueryBuilder.Operator.AND);
if (fieldBoosts != null) {
// TODO: extract fieldBoost parsing from loop in case
for (String fieldBoost : fieldBoosts) {
try {
((QueryStringQueryBuilder) queryBuilder).field(fieldBoost.split("\\^")[0],
Float.parseFloat(fieldBoost.split("\\^")[1]));
} catch (ArrayIndexOutOfBoundsException e) {
Logger.trace("Invalid field boost: " + fieldBoost);
}
}
}
} else {
queryBuilder = QueryBuilders.matchAllQuery();
}
FunctionScoreQueryBuilder fqBuilder = QueryBuilders.functionScoreQuery(queryBuilder);
fqBuilder.boostMode(CombineFunction.MULT);
fqBuilder.scoreMode(FiltersFunctionScoreQuery.ScoreMode.Sum.name().toLowerCase());
fqBuilder.add(ScoreFunctionBuilders.fieldValueFactorFunction(Record.LINK_COUNT));
searchRequestBuilder.setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setQuery(QueryBuilders.boolQuery().must(fqBuilder).filter(globalAndFilter));
return searchRequestBuilder.setFrom(aFrom).setSize(aSize).execute().actionGet();
}
public boolean hasIndex(String aIndex) {
return mClient.admin().indices().prepareExists(aIndex).execute().actionGet().isExists();
}
private void refreshIndex(String aIndex) {
try {
mClient.admin().indices().refresh(new RefreshRequest(aIndex)).actionGet();
} catch (IndexNotFoundException e) {
Logger.error("Trying to refresh index \"" + aIndex + "\" in Elasticsearch.");
e.printStackTrace();
}
}
public void deleteIndex(String aIndex) {
try {
mClient.admin().indices().delete(new DeleteIndexRequest(aIndex)).actionGet();
} catch (IndexNotFoundException e) {
Logger.error("Trying to delete index \"" + aIndex + "\" from Elasticsearch.");
e.printStackTrace();
}
}
public void createIndex(String aIndex) {
try {
mClient.admin().indices().prepareCreate(aIndex).setSource(mConfig.getIndexConfigString()).execute().actionGet();
mClient.admin().cluster().prepareHealth().setWaitForYellowStatus().execute().actionGet();
} catch (ElasticsearchException indexAlreadyExists) {
Logger.error("Trying to create index \"" + aIndex + "\" in Elasticsearch. Index already exists.");
indexAlreadyExists.printStackTrace();
} catch (IOException ioException) {
Logger.error("Trying to create index \"" + aIndex + "\" in Elasticsearch. Couldn't read index config file.");
ioException.printStackTrace();
}
}
public ElasticsearchConfig getConfig() {
return mConfig;
}
}