package com.oculusinfo.binning.io.impl; import com.oculusinfo.binning.BinIndex; import com.oculusinfo.binning.TileData; import com.oculusinfo.binning.TileIndex; import com.oculusinfo.binning.TilePyramid; import com.oculusinfo.binning.impl.SparseTileData; import com.oculusinfo.binning.io.PyramidIO; import com.oculusinfo.binning.io.serialization.TileSerializer; import com.oculusinfo.binning.util.JsonUtilities; import org.apache.commons.lang.StringEscapeUtils; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.index.query.BoolFilterBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.FilterBuilder; import org.elasticsearch.index.query.FilterBuilders; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.RangeFilterBuilder; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.metrics.max.Max; import org.elasticsearch.search.aggregations.metrics.min.Min; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.awt.geom.Rectangle2D; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; /** * Created by cmenezes on 2015-06-04. */ public class ElasticsearchPyramidIO implements PyramidIO { private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchPyramidIO.class); public static final int BINS = 256; private Client client; private String index; private String xField; private String yField; private TilePyramid tilePyramid; private final int numZoomlevels; private List<Double> maxValues; public ElasticsearchPyramidIO( String esClusterName, String esIndex, String xField, String yField, String esTransportAddress, int esTransportPort, TilePyramid tilePyramid, int zoomLevelPrecompute) { this.index = esIndex; this.xField = xField; this.yField = yField; this.tilePyramid = tilePyramid; this.numZoomlevels = zoomLevelPrecompute; if ( this.client == null ) { LOGGER.debug("Existing node not found."); try{ Settings settings = ImmutableSettings.settingsBuilder() .put("cluster.name", esClusterName) .put("client.transport.sniff", false) .put("sniffOnConnection", true).build(); this.client = new TransportClient(settings) .addTransportAddress(new InetSocketTransportAddress(esTransportAddress, esTransportPort)); }catch (IllegalArgumentException e){ LOGGER.debug("Illegal arguments to Elasticsearch node builder."); } } else { LOGGER.debug("An Elasticsearch client already exists."); } } private SearchRequestBuilder baseQuery(FilterBuilder filter){ return this.client.prepareSearch(this.index) .setTypes("datum") .setSearchType(SearchType.COUNT) .setQuery(QueryBuilders.filteredQuery( QueryBuilders.matchAllQuery(), filter )); } private SearchResponse timeFilteredRequest(double startX, double endX, double startY, double endY, JSONObject filterJSON){ // the first filter added excludes everything outside of the tile boundary // on both the xField and the yField BoolFilterBuilder boundaryFilter = FilterBuilders.boolFilter(); boundaryFilter.must( FilterBuilders.rangeFilter(this.xField) .gte(startX) //startx is min val .lte(endX), FilterBuilders.rangeFilter(this.yField) .gte(endY) //endy is min val .lte(startY) ); Map<String, Object> filterMap = null; // transform filter list json to a map try { filterMap = JsonUtilities.jsonObjToMap(filterJSON); } catch (Exception e){ filterMap = null; } if (filterMap != null) { Set<String> strings = filterMap.keySet(); List<Map> filterList = new ArrayList<>(); for (String str : strings) { filterList.add((Map) filterMap.get(str)); } for (Map filter : filterList) { String type = (String) filter.get("type"); String filterPath = (String)filter.get("path"); switch (type){ case "terms": Map termsMap = (Map)filter.get("terms"); List<String> termsList = new ArrayList<>(); for (Object key : termsMap.keySet()){ termsList.add((String)termsMap.get(key)); } boundaryFilter.must(FilterBuilders.termsFilter(filterPath, termsList).execution("or")); break; case "range": // Note range filter requires a numeric value to filter on, // doesn't work if passing in formatted date strings like "2015-03-01" // check for existence of from OR to or FROM and TO RangeFilterBuilder rangeFilterBuilder = FilterBuilders.rangeFilter(filterPath); if (filter.containsKey("from") && filter.get("from") != null){ rangeFilterBuilder.from(filter.get("from")); } if (filter.containsKey("to") && filter.get("to") != null){ rangeFilterBuilder.to(filter.get("to")); } boundaryFilter.must(rangeFilterBuilder); break; case "UDF": // build a user defined facet BoolQueryBuilder boolQuery = new BoolQueryBuilder(); boolQuery.must(QueryBuilders.queryStringQuery(StringEscapeUtils.escapeJavaScript((String) filter.get("query"))).field("body.en")); boundaryFilter.must(FilterBuilders.queryFilter(boolQuery)); break; default: LOGGER.error("Unsupported filter type"); } } } SearchRequestBuilder searchRequest = baseQuery(boundaryFilter) .addAggregation( AggregationBuilders.histogram("xField") .field(this.xField) .interval(getHistogramIntervalFromBounds(startX, endX)) .minDocCount(1) .subAggregation( AggregationBuilders.histogram("yField") .field(this.yField) .interval(getHistogramIntervalFromBounds(endY, startY)) .minDocCount(1) ) ); return searchRequest .execute() .actionGet(); } private Long getHistogramIntervalFromBounds(double start, double end) { long interval = ((long) Math.floor((end - start) / BINS)); // the calculated interval can be less than 1 if the data is sparse // we cannot pass elasticsearch a histogram interval less than 1 // so set it to 1 if (interval < 1) { interval = 1; } return interval; }; public void shutdown(){ try { LOGGER.debug("Shutting down the ES client"); this.client.close(); }catch(Exception e){ LOGGER.error("Couldn't close the elasticsearch connection", e); } } @Override public void initializeForWrite(String pyramidId) throws IOException { } @Override public <T> void writeTiles(String pyramidId, TileSerializer<T> serializer, Iterable<TileData<T>> data) throws IOException {} @Override public void writeMetaData(String pyramidId, String metaData) throws IOException { } @Override public void initializeForRead(String pyramidId, int width, int height, Properties dataDescription) { } private Map<Integer, Map> parseAggregations(Histogram date_agg, TileIndex tileIndex) { List<? extends Histogram.Bucket> dateBuckets = date_agg.getBuckets(); Map<Integer, Map> result = new HashMap<>(); long maxval = 0; for (Histogram.Bucket dateBucket : dateBuckets) { Histogram cluster_agg = dateBucket.getAggregations().get("yField"); List<? extends Histogram.Bucket> clusterBuckets = cluster_agg.getBuckets(); BinIndex xBinIndex = tilePyramid.rootToBin(dateBucket.getKeyAsNumber().doubleValue(), 0, tileIndex); int xBin = xBinIndex.getX(); Map<Integer,Long> intermediate = new HashMap<>(); result.put(xBin, intermediate); for( Histogram.Bucket clusterBucket : clusterBuckets) { //given the bin coordinates, see if there's any data in those bins, add values to existing bins BinIndex binIndex = tilePyramid.rootToBin(dateBucket.getKeyAsNumber().doubleValue(), clusterBucket.getKeyAsNumber().doubleValue(), tileIndex); int yBin = binIndex.getY(); if (result.containsKey(xBin) && result.get(xBin).containsKey(yBin)) { intermediate.put(yBin, (long) intermediate.get(yBin) + clusterBucket.getDocCount()); } else if (result.containsKey(xBin) && !(intermediate.containsKey(yBin))) { intermediate.put(yBin, clusterBucket.getDocCount()); } if (maxval < clusterBucket.getDocCount()){ maxval = clusterBucket.getDocCount(); } } } return result; } @Override public <T> List<TileData<T>> readTiles(String pyramidId, TileSerializer<T> serializer, Iterable<TileIndex> tiles, JSONObject properties) throws IOException{ List<TileData<T>> results = new LinkedList<TileData<T>>(); // iterate over the tile indices for (TileIndex tileIndex: tiles) { Rectangle2D rect = tilePyramid.getTileBounds(tileIndex); // get minimum/start time, max/end time double startX = rect.getX(); double endX = rect.getMaxX(); double startY = rect.getMaxY(); double endY = rect.getY(); SearchResponse sr = timeFilteredRequest(startX, endX, startY, endY, properties); if (responseHasData(sr)) { Histogram date_agg = sr.getAggregations().get("xField"); Map<Integer, Map> tileMap = parseAggregations(date_agg, tileIndex); SparseTileData tileData = new SparseTileData(tileIndex,tileMap, 0); results.add(tileData); } } return results; } protected boolean responseHasData(SearchResponse sr) { SearchHits hits = sr.getHits(); long totalHits = hits.getTotalHits(); return (totalHits > 0); } @Override public <T> List<TileData<T>> readTiles(String pyramidId, TileSerializer<T> serializer, Iterable<TileIndex> tiles) throws IOException { return readTiles(pyramidId,serializer,tiles); } @Override public <T> InputStream getTileStream(String pyramidId, TileSerializer<T> serializer, TileIndex tile) throws IOException { return null; } /* * Get the boundaries for the X and Y fields configured for this PyramidIO * could be used to get the AOI tile pyramid bounds * * */ private Map<String, Double> getFieldBoundaries(){ SearchRequestBuilder boundsRequest = this.client.prepareSearch(this.index) .setTypes("datum") .setSearchType(SearchType.COUNT) .addAggregation( AggregationBuilders.min("minX").field(this.xField) ).addAggregation( AggregationBuilders.min("minY").field(this.yField) ).addAggregation( AggregationBuilders.max("maxX").field(this.xField) ).addAggregation( AggregationBuilders.max("maxY").field(this.yField) ); SearchResponse searchResponse = boundsRequest.execute().actionGet(); Aggregations aggregations = searchResponse.getAggregations(); Min minX = aggregations.get("minX"); Min minY = aggregations.get("minY"); Max maxX = aggregations.get("maxX"); Max maxY = aggregations.get("maxY"); Map<String, Double> boundsMap = new HashMap<>(); boundsMap.put("minX", minX.getValue()); boundsMap.put("minY", minY.getValue()); boundsMap.put("maxX", maxX.getValue()); boundsMap.put("maxY", maxY.getValue()); return boundsMap; } private double searchForMaxBucketValue(double intervalX, double intervalY) { // build a query with a 2d aggregation on the xField and yField // change the interval SearchRequestBuilder metaDataQuery = this.client.prepareSearch(this.index) .setTypes("datum") .setSearchType(SearchType.COUNT) .addAggregation( AggregationBuilders.histogram("xAgg") .field(this.xField) .interval((long) intervalX) .order(Histogram.Order.COUNT_DESC) .subAggregation( AggregationBuilders.histogram("yAgg") .field(this.yField) .interval((long) intervalY) .order(Histogram.Order.COUNT_DESC) ) ); SearchResponse searchResponse = metaDataQuery.execute().actionGet(); Histogram agg = searchResponse.getAggregations().get("xAgg"); return getMaxValueFrom2DHistogram(agg); } // iterate through the aggregation, get the max doc count private double getMaxValueFrom2DHistogram(Histogram agg) { double maxValue = 1; for (Histogram.Bucket bucket : agg.getBuckets()) { Histogram yHistogram = bucket.getAggregations().get("yAgg"); // there could be no buckets, in which case // there will be no effect on the max value if (yHistogram.getBuckets().size() < 1){ continue; } // don't need to iterate over yAgg buckets because the query has ordered aggregations // take the first one if it's greater than maxValue if (maxValue < yHistogram.getBuckets().get(0).getDocCount() ) maxValue = yHistogram.getBuckets().get(0).getDocCount(); } return maxValue; } private List<Double> readMetaMaxFromElasticsearch() { Rectangle2D bounds = tilePyramid.getBounds(); List<Double> maxCountList = new ArrayList<>(); double pixelsPerTile = 256.0; for ( int i = 0; i < this.numZoomlevels ; i++ ){ // calculated by dividing the entirety of the dataset bounds // by the number of pixels in a tile and the number of tiles at that Zoom level double tilesAtZoomLevel = Math.pow(2, i); double xInterval = bounds.getWidth() / (pixelsPerTile * tilesAtZoomLevel); double yInterval = bounds.getHeight() / (pixelsPerTile * tilesAtZoomLevel); if (xInterval < 1){ xInterval = 1; } if (yInterval < 1){ yInterval = 1; } // search ES based off the calculated interval double maxDocCount = searchForMaxBucketValue(xInterval, yInterval); maxCountList.add(i, maxDocCount); } while(maxCountList.size() < 15){ maxCountList.add(maxCountList.get(maxCountList.size()-1)*0.75); } return maxCountList; } private JSONObject formatMaxValuesAsMetadataJSONString(List<Double> maxValues) throws JSONException { JSONObject metaDataJSON = new JSONObject(); JSONObject levelMinMap = new JSONObject(); JSONObject levelMaxMap = new JSONObject(); for ( int i = 0; i < maxValues.size(); i++ ) { levelMinMap.put(""+i, String.valueOf(0)); levelMaxMap.put(""+i, String.valueOf(maxValues.get(i))); } metaDataJSON.put("levelMinimums", levelMinMap); metaDataJSON.put("levelMaximums", levelMaxMap); return metaDataJSON; } @Override public String readMetaData(String pyramidId) throws IOException { Rectangle2D bounds = tilePyramid.getBounds(); try { // read meta max values from Elasticsearch List<Double> maxValues = readMetaMaxFromElasticsearch(); this.maxValues = maxValues; JSONObject metaDataJSON = new JSONObject(); JSONArray boundsJSON = new JSONArray(); boundsJSON.put(0, bounds.getMinX()); boundsJSON.put(1, bounds.getMinY()); boundsJSON.put(2, bounds.getMaxX()); boundsJSON.put(3, bounds.getMaxY()); metaDataJSON.put("bounds", boundsJSON); metaDataJSON.put("maxzoom", 1); metaDataJSON.put("description", "Elasticsearch rendered layer"); metaDataJSON.put("scheme", "TMS"); metaDataJSON.put("projection", "EPSG:4326"); metaDataJSON.put("name", "ES_PLOT"); metaDataJSON.put("minzoom", 1); metaDataJSON.put("tilesize", 256); metaDataJSON.put("meta", formatMaxValuesAsMetadataJSONString(maxValues)); return metaDataJSON.toString(); } catch (JSONException e) { LOGGER.error("Couldn't build JSON metadata. Defaulting to arbitrary max/mins."); return "{\"bounds\":["+ bounds.getMinX() + "," + bounds.getMinY() + "," + bounds.getMaxX() + "," + bounds.getMaxY() + "],"+ "\"maxzoom\":1,\"scheme\":\"TMS\","+ "\"description\":\"Elasticsearch test layer\","+ "\"projection\":\"EPSG:4326\","+ "\"name\":\"ES_SIFT_CROSSPLOT\","+ "\"minzoom\":1,\"tilesize\":256,"+ "\"meta\":{\"levelMinimums\":{\"1\":\"0.0\", \"0\":\"0\"},\"levelMaximums\":{\"1\":\"174\", \"0\":\"2560\"}}}"; } } @Override public void removeTiles(String id, Iterable<TileIndex> tiles) throws IOException { } }