package org.rakam.clickhouse.analysis;
import com.facebook.presto.sql.parser.SqlParser;
import com.facebook.presto.sql.tree.Expression;
import com.google.common.collect.ImmutableMap;
import org.rakam.analysis.EventExplorer;
import org.rakam.config.ProjectConfig;
import org.rakam.report.DelegateQueryExecution;
import org.rakam.report.QueryExecution;
import org.rakam.report.QueryExecutor;
import org.rakam.report.QueryExecutorService;
import org.rakam.report.QueryResult;
import org.rakam.report.realtime.AggregationType;
import org.rakam.util.RakamException;
import org.rakam.util.ValidationUtil;
import javax.inject.Inject;
import java.time.Instant;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import static com.facebook.presto.sql.RakamExpressionFormatter.formatIdentifier;
import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
import static java.lang.String.format;
import static java.time.temporal.ChronoUnit.DAYS;
import static org.rakam.analysis.EventExplorer.ReferenceType.COLUMN;
import static org.rakam.analysis.EventExplorer.ReferenceType.REFERENCE;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.DAY;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.DAY_OF_MONTH;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.DAY_OF_WEEK;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.HOUR;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.HOUR_OF_DAY;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.MONTH;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.MONTH_OF_YEAR;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.YEAR;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.fromPrettyName;
import static org.rakam.analysis.EventExplorer.TimestampTransformation.fromString;
import static org.rakam.clickhouse.analysis.ClickHouseQueryExecution.DATE_TIME_FORMATTER;
import static org.rakam.collection.SchemaField.stripName;
import static org.rakam.report.eventexplorer.AbstractEventExplorer.checkReference;
import static org.rakam.report.realtime.AggregationType.COUNT;
import static org.rakam.util.ValidationUtil.checkCollection;
import static org.rakam.util.ValidationUtil.checkLiteral;
import static org.rakam.util.ValidationUtil.checkProject;
import static org.rakam.util.ValidationUtil.checkTableColumn;
public class ClickHouseEventExplorer
implements EventExplorer
{
protected final Reference DEFAULT_SEGMENT = new Reference(COLUMN, "_collection");
private static final Map<TimestampTransformation, String> timestampMapping = ImmutableMap.
<TimestampTransformation, String>builder()
.put(HOUR_OF_DAY, "toHour(%s)")
.put(DAY_OF_MONTH, "toDayOfMonth(%s)")
// .put(WEEK_OF_YEAR, "cast(extract(doy FROM %s) as UInt32)")
.put(MONTH_OF_YEAR, "toMonth(%s)")
// .put(QUARTER_OF_YEAR, "cast(extract(quarter FROM %s) as UInt32)")
.put(DAY_OF_WEEK, "toDayOfWeek(%s)")
.put(HOUR, "toStartOfHour(%s)")
.put(DAY, "cast(%s as Date)")
.put(MONTH, "toStartOfMonth(%s)")
.put(YEAR, "toStartOfYear(%s)")
.build();
private final QueryExecutor executor;
private final QueryExecutorService service;
private static final SqlParser sqlParser = new SqlParser();
private final ProjectConfig projectConfig;
@Inject
public ClickHouseEventExplorer(QueryExecutor executor, ProjectConfig projectConfig, QueryExecutorService service)
{
this.executor = executor;
this.service = service;
this.projectConfig = projectConfig;
}
@Override
public QueryExecution analyze(String project, List<String> collections, Measure measure, Reference grouping,
Reference segmentValue2, String filterExpression, Instant startDate, Instant endDate)
{
Reference segment = segmentValue2 == null ? DEFAULT_SEGMENT : segmentValue2;
if (grouping != null && grouping.type == REFERENCE) {
checkReference(grouping.value, startDate, endDate, collections.size());
}
if (segment != null && segment.type == REFERENCE) {
checkReference(segment.value, startDate, endDate, collections.size());
}
String groups = Arrays.asList(
new AbstractMap.SimpleEntry<>("segment", segment),
new AbstractMap.SimpleEntry<>("group", grouping)).stream()
.filter(e -> e != null)
.map(e -> getColumnReference(e.getValue()) + "_" + e.getKey())
.collect(Collectors.joining(", "));
String groupBy = groups.isEmpty() ? "" : ("GROUP BY " + groups + " WITH TOTALS");
String timeFilter = format(" %s between toDateTime('%s') and toDateTime('%s')",
checkTableColumn(projectConfig.getTimeColumn()),
DATE_TIME_FORMATTER.format(startDate), DATE_TIME_FORMATTER.format(endDate.plus(1, DAYS)));
if (filterExpression != null) {
synchronized (sqlParser) {
Expression expression = sqlParser.createExpression(filterExpression);
filterExpression = formatExpression(expression);
}
}
String where = timeFilter + (filterExpression == null ? "" : (" AND " + filterExpression));
String measureAgg = convertSqlFunction(measure != null &&
measure.aggregation != null ? measure.aggregation : COUNT);
String measureColumn = measure != null &&
measure.column != null ? checkTableColumn(measure.column, '`') : "";
String computeQuery;
if (collections.size() == 1) {
String select = generateComputeQuery(grouping, segment, collections.get(0));
computeQuery = format(" select %s %s as value from %s.%s where %s %s",
select.isEmpty() ? select : select + ",",
format(measureAgg, measureColumn),
project, checkCollection(collections.get(0), '`'),
where, groupBy);
}
else {
String selectPart = (grouping == null ? "" : checkTableColumn(getColumnReference(grouping) + "_group", '`')) +
(grouping == null ? "" : ", ") + checkTableColumn(getColumnReference(segment) + "_segment", '`');
String queries = collections.size() == 1 ? collections.get(0) : collections.stream()
.map(collection -> {
String select = generateComputeQuery(grouping, segment, collection);
String format = format("select '%s' as _collection, %s %s from %s.%s where %s",
checkLiteral(collection),
measureColumn.isEmpty() ? select : measureColumn + ",",
measureColumn,
project, checkCollection(collection, '`'), where);
return format;
})
.collect(Collectors.joining("\n union all "));
computeQuery = format("select %s %s as value from (\n" +
"%s\n" +
") %s",
selectPart.isEmpty() ? "" : selectPart + ",",
format(measureAgg, measureColumn),
queries,
groupBy);
}
String query = format("select %s %s %s value from (\n" +
"%s\n" +
") ORDER BY %s DESC LIMIT 500",
grouping == null ? "" : format(grouping.type == COLUMN ? checkTableColumn("%s_group", '`') : checkTableColumn("%s_group", '`'), getColumnReference(grouping)),
segment == null ? "" : ((grouping == null ? "" : ",") + format(segment.type == COLUMN ?
checkTableColumn("%s_segment", '`') :
checkTableColumn("%s_segment", '`'), getColumnReference(segment))),
grouping != null || segment != null ? "," : "",
computeQuery, segment != null && grouping != null ? 3 : 2);
return new DelegateQueryExecution(executor.executeRawQuery(query), result -> {
List<List<Object>> newResult = result.getResult();
return new QueryResult(result.getMetadata(), newResult, result.getProperties());
});
}
private static String formatExpression(Expression value)
{
return ClickhouseExpressionFormatter.formatExpression(value,
name -> name.getParts().stream().map(e -> formatIdentifier(e, '`')).collect(Collectors.joining(".")),
name -> name.getParts().stream()
.map(e -> formatIdentifier(e, '`')).collect(Collectors.joining(".")), '`');
}
@Override
public CompletableFuture<QueryResult> getEventStatistics(String project, Optional<Set<String>> collections, Optional<String> dimension, Instant startDate, Instant endDate)
{
checkProject(project);
if (collections.isPresent() && collections.get().isEmpty()) {
return CompletableFuture.completedFuture(QueryResult.empty());
}
if (dimension.isPresent()) {
checkReference(dimension.get(), startDate, endDate, collections.map(v -> v.size()).orElse(10));
}
String timePredicate = format("%s between toDateTime('%s') and toDateTime('%s')",
ValidationUtil.checkCollection(projectConfig.getTimeColumn()),
DATE_TIME_FORMATTER.format(startDate),
DATE_TIME_FORMATTER.format(endDate.plus(1, DAYS)));
String collectionQuery = collections.map(v -> "(" + v.stream()
.map(col -> String.format("SELECT %s, cast('%s' as string) as \"$collection\" FROM %s",
checkTableColumn(projectConfig.getTimeColumn()),
col, checkCollection(col, '`'))).collect(Collectors.joining(", ")) + ") ")
.orElse("_all");
String query;
if (dimension.isPresent()) {
Optional<TimestampTransformation> aggregationMethod = fromPrettyName(dimension.get());
if (!aggregationMethod.isPresent()) {
throw new RakamException(BAD_REQUEST);
}
String function = format(timestampMapping.get(aggregationMethod.get()), projectConfig.getTimeColumn());
query = format("select \"$collection\" as collection, %s as %s, count(*) from %s where %s group by \"$collection\", %s order by %s desc",
function,
aggregationMethod.get(), collectionQuery, timePredicate,
function, function);
}
else {
query = String.format("select \"$collection\" as collection, count(*) total \n" +
" from %s where %s group by \"$collection\"", collectionQuery, timePredicate);
}
return service.executeQuery(project, query).getResult();
}
public String convertSqlFunction(AggregationType aggType)
{
switch (aggType) {
case AVERAGE:
return "avg(%s)";
case MAXIMUM:
return "max(%s)";
case MINIMUM:
return "min(%s)";
case COUNT:
return "count(%s)";
case SUM:
return "sum(%s)";
case COUNT_UNIQUE:
return "uniqExact(%s)";
case APPROXIMATE_UNIQUE:
return "uniq(distinct %s)";
default:
throw new IllegalArgumentException("aggregation type is not supported");
}
}
protected String generateComputeQuery(Reference grouping, Reference segment, String collection)
{
StringBuilder selectBuilder = new StringBuilder();
if (grouping != null) {
selectBuilder.append(getColumnValue(grouping, true) + " as " + checkTableColumn(getColumnReference(grouping) + "_group", '`'));
if (segment != null) {
selectBuilder.append(", ");
}
}
if (segment != null) {
selectBuilder.append((!segment.equals(DEFAULT_SEGMENT) ? getColumnValue(segment, true) : "'" + stripName(collection, "collection") + "'") + " as "
+ checkTableColumn(getColumnReference(segment) + "_segment", '`'));
}
return selectBuilder.toString();
}
protected String getColumnValue(Reference ref, boolean format)
{
switch (ref.type) {
case COLUMN:
return format ? checkTableColumn(ref.value, '`') : ref.value;
case REFERENCE:
return format(timestampMapping.get(fromString(ref.value.replace(" ", "_"))), projectConfig.getTimeColumn());
default:
throw new IllegalArgumentException("Unknown reference type: " + ref.value);
}
}
@Override
public Map<String, List<String>> getExtraDimensions(String project)
{
Map<String, List<String>> builder = new HashMap<>();
for (TimestampTransformation transformation : timestampMapping.keySet()) {
builder.computeIfAbsent(transformation.getCategory(), k -> new ArrayList<>())
.add(transformation.getPrettyName());
}
return builder;
}
public String getColumnReference(Reference ref)
{
switch (ref.type) {
case COLUMN:
return ref.value;
case REFERENCE:
return projectConfig.getTimeColumn();
default:
throw new IllegalArgumentException("Unknown reference type: " + ref.value);
}
}
}