/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.plan.expr; import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.tajo.algebra.ColumnReferenceExpr; import org.apache.tajo.algebra.NamedExpr; import org.apache.tajo.algebra.OpType; import org.apache.tajo.annotation.Nullable; import org.apache.tajo.catalog.*; import org.apache.tajo.common.TajoDataTypes.DataType; import org.apache.tajo.datum.Datum; import org.apache.tajo.exception.TajoInternalError; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.Target; import org.apache.tajo.plan.util.ExprFinder; import org.apache.tajo.schema.IdentifierUtil; import java.util.*; public class EvalTreeUtil { public static void changeColumnRef(EvalNode node, String oldName, String newName) { node.postOrder(new ChangeColumnRefVisitor(oldName, newName)); } public static int replace(EvalNode expr, EvalNode targetExpr, EvalNode tobeReplaced) { EvalReplaceVisitor replacer = new EvalReplaceVisitor(targetExpr, tobeReplaced); ReplaceContext context = new ReplaceContext(); replacer.visit(context, expr, new Stack<>()); return context.countOfReplaces; } private static class ReplaceContext { int countOfReplaces = 0; } public static class EvalReplaceVisitor extends BasicEvalNodeVisitor<ReplaceContext, EvalNode> { private EvalNode target; private EvalNode tobeReplaced; public EvalReplaceVisitor(EvalNode target, EvalNode tobeReplaced) { this.target = target; this.tobeReplaced = tobeReplaced; } @Override public EvalNode visit(ReplaceContext context, EvalNode evalNode, Stack<EvalNode> stack) { super.visit(context, evalNode, stack); if (evalNode.equals(target)) { context.countOfReplaces++; EvalNode parent = stack.peek(); if (parent instanceof BetweenPredicateEval) { BetweenPredicateEval between = (BetweenPredicateEval) parent; if (between.getPredicand().equals(evalNode)) { between.setPredicand(tobeReplaced); } if (between.getBegin().equals(evalNode)) { between.setBegin(tobeReplaced); } if (between.getEnd().equals(evalNode)) { between.setEnd(tobeReplaced); } } else if (parent instanceof CaseWhenEval) { CaseWhenEval caseWhen = (CaseWhenEval) parent; // Here, we need to only consider only 'Else' // because IfElseEval is handled in the below condition. if (caseWhen.hasElse() && caseWhen.getElse().equals(evalNode)) { caseWhen.setElseResult(tobeReplaced); } } else if (parent instanceof CaseWhenEval.IfThenEval) { CaseWhenEval.IfThenEval ifThen = (CaseWhenEval.IfThenEval) parent; if (ifThen.getCondition().equals(evalNode)) { ifThen.setCondition(tobeReplaced); } if (ifThen.getResult().equals(evalNode)) { ifThen.setResult(tobeReplaced); } } else if (parent instanceof FunctionEval) { FunctionEval functionEval = (FunctionEval) parent; EvalNode [] arguments = functionEval.getArgs(); for (int i = 0; i < arguments.length; i++) { if (arguments[i].equals(evalNode)) { arguments[i] = tobeReplaced; } } functionEval.setArgs(arguments); } else if (parent instanceof UnaryEval) { if (((UnaryEval)parent).getChild().equals(evalNode)) { ((UnaryEval)parent).setChild(tobeReplaced); } } else if (parent instanceof BinaryEval) { BinaryEval binary = (BinaryEval) parent; if (binary.getLeftExpr() != null && binary.getLeftExpr().equals(evalNode)) { binary.setLeftExpr(tobeReplaced); } if (binary.getRightExpr() != null && binary.getRightExpr().equals(evalNode)) { binary.setRightExpr(tobeReplaced); } } } return evalNode; } } /** * It finds unique columns from a EvalNode. */ public static LinkedHashSet<Column> findUniqueColumns(EvalNode node) { UniqueColumnFinder finder = new UniqueColumnFinder(); node.postOrder(finder); return finder.getColumnRefs(); } public static List<Column> findAllColumnRefs(EvalNode node) { AllColumnRefFinder finder = new AllColumnRefFinder(); node.postOrder(finder); return finder.getColumnRefs(); } public static Schema getSchemaByTargets(Schema inputSchema, List<Target> targets) { return SchemaBuilder.builder().addAll(targets, new Function<Target, Column>() { @Override public Column apply(@javax.annotation.Nullable Target target) { return new Column(target.hasAlias() ? target.getAlias() : target.getEvalTree().getName(), getDomainByExpr(inputSchema, target.getEvalTree())); } }).build(); } public static String columnsToStr(Collection<Column> columns) { StringBuilder sb = new StringBuilder(); String prefix = ""; for (Column column: columns) { sb.append(prefix).append(column.getQualifiedName()); prefix = ","; } return sb.toString(); } public static DataType getDomainByExpr(Schema inputSchema, EvalNode expr) { switch (expr.getType()) { case AND: case OR: case EQUAL: case NOT_EQUAL: case LTH: case LEQ: case GTH: case GEQ: case PLUS: case MINUS: case MULTIPLY: case DIVIDE: case CONST: case FUNCTION: return TypeConverter.convert(expr.getValueType()).getDataType(); case FIELD: FieldEval fieldEval = (FieldEval) expr; return inputSchema.getColumn(fieldEval.getName()).getDataType(); default: throw new TajoInternalError("Unknown expr type: " + expr.getType().toString()); } } /** * Return all exprs to refer columns corresponding to the target. * * @param expr * @param target to be found * @return a list of exprs */ public static Collection<EvalNode> getContainExpr(EvalNode expr, Column target) { Set<EvalNode> exprSet = Sets.newHashSet(); getContainExpr(expr, target, exprSet); return exprSet; } /** * Return the counter to count the number of expression types individually. * * @param expr * @return */ public static Map<EvalType, Integer> getExprCounters(EvalNode expr) { VariableCounter counter = new VariableCounter(); expr.postOrder(counter); return counter.getCounter(); } private static void getContainExpr(EvalNode expr, Column target, Set<EvalNode> exprSet) { switch (expr.getType()) { case EQUAL: case LTH: case LEQ: case GTH: case GEQ: case NOT_EQUAL: if (containColumnRef(expr, target)) { exprSet.add(expr); } break; default: break; } } /** * Examine if the expr contains the column reference corresponding * to the target column */ public static boolean containColumnRef(EvalNode expr, Column target) { Set<Column> exprSet = findUniqueColumns(expr); return exprSet.contains(target); } /** * It separates a singular CNF-formed join condition into a join condition, a left join filter, and * right join filter. * * @param joinQual the original join condition * @param leftSchema Left table schema * @param rightSchema Left table schema * @return Three element EvalNodes, 0 - join condition, 1 - left join filter, 2 - right join filter. */ public static EvalNode[] extractJoinConditions(EvalNode joinQual, Schema leftSchema, Schema rightSchema) { List<EvalNode> joinQuals = Lists.newArrayList(); List<EvalNode> leftFilters = Lists.newArrayList(); List<EvalNode> rightFilters = Lists.newArrayList(); for (EvalNode eachQual : AlgebraicUtil.toConjunctiveNormalFormArray(joinQual)) { if (!(eachQual instanceof BinaryEval)) { continue; // todo 'between', etc. } BinaryEval binaryEval = (BinaryEval)eachQual; LinkedHashSet<Column> leftColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getLeftExpr()); LinkedHashSet<Column> rightColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getRightExpr()); boolean leftInLeft = leftSchema.containsAny(leftColumns); boolean rightInLeft = leftSchema.containsAny(rightColumns); boolean leftInRight = rightSchema.containsAny(leftColumns); boolean rightInRight = rightSchema.containsAny(rightColumns); boolean columnsFromLeft = leftInLeft || rightInLeft; boolean columnsFromRight = leftInRight || rightInRight; if (!columnsFromLeft && !columnsFromRight) { continue; // todo constant expression : this should be done in logical phase } if (columnsFromLeft ^ columnsFromRight) { if (columnsFromLeft) { leftFilters.add(eachQual); } else { rightFilters.add(eachQual); } continue; } if ((leftInLeft && rightInLeft) || (leftInRight && rightInRight)) { continue; // todo not allowed yet : this should be checked in logical phase } joinQuals.add(eachQual); } return new EvalNode[] { joinQuals.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(joinQuals), leftFilters.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(leftFilters), rightFilters.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(rightFilters) }; } /** * If a given expression is join condition, it returns TRUE. Otherwise, it returns FALSE. * * If three conditions are satisfied, we can recognize the expression as a equi join condition. * <ol> * <li>An expression is an equal comparison expression.</li> * <li>Both terms in an expression are column references.</li> * <li>Both column references point come from different tables</li> * </ol> * * For theta join condition, we will use "an expression is a predicate including column references which come * from different two tables" instead of the first rule. * * @param expr EvalNode to be evaluated * @param includeThetaJoin If true, it will return equi as well as non-equi join conditions. * Otherwise, it only returns equi-join conditions. * @return True if it is join condition. */ public static boolean isJoinQual(EvalNode expr, boolean includeThetaJoin) { return isJoinQual(null, null, null, expr, includeThetaJoin); } /** * If a given expression is join condition, it returns TRUE. Otherwise, it returns FALSE. * * If three conditions are satisfied, we can recognize the expression as a equi join condition. * <ol> * <li>An expression is an equal comparison expression.</li> * <li>Both terms in an expression are column references.</li> * <li>Both column references point come from different tables</li> * </ol> * * For theta join condition, we will use "an expression is a predicate including column references which come * from different two tables" instead of the first rule. * * @param block if block is not null, it tracks the lineage of aliased name derived from complex expressions. * @param leftSchema Schema to be used to check if columns belong to different relations * @param rightSchema Schema to be used to check if columns belong to different relations * @param expr EvalNode to be evaluated * @param includeThetaJoin If true, it will return equi as well as non-equi join conditions. * Otherwise, it only returns equi-join conditions. * @return True if it is join condition. */ public static boolean isJoinQual(@Nullable LogicalPlan.QueryBlock block, @Nullable Schema leftSchema, @Nullable Schema rightSchema, EvalNode expr, boolean includeThetaJoin) { if (expr instanceof BinaryEval) { boolean joinComparator; if (includeThetaJoin) { joinComparator = EvalType.isComparisonOperator(expr.getType()); } else { joinComparator = expr.getType() == EvalType.EQUAL; } BinaryEval binaryEval = (BinaryEval) expr; boolean isBothTermFields = isSingleColumn(binaryEval.getLeftExpr()) && isSingleColumn(binaryEval.getRightExpr()); Set<Column> leftColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getLeftExpr()); Set<Column> rightColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getRightExpr()); boolean ensureColumnsOfDifferentTables = false; if (leftColumns.size() == 1 && rightColumns.size() == 1) { // ensure there is only one column of each table Column leftColumn = leftColumns.iterator().next(); Column rightColumn = rightColumns.iterator().next(); // ensure if both column belong to different tables if (block != null) { ensureColumnsOfDifferentTables = isJoinQualWithOnlyColumns(block, leftColumn, rightColumn); } else if (leftSchema != null && rightSchema != null) { ensureColumnsOfDifferentTables = isJoinQualwithSchemas(leftSchema, rightSchema, leftColumn, rightColumn); } else { ensureColumnsOfDifferentTables = isJoinQualWithOnlyColumns(null, leftColumn, rightColumn); } } return joinComparator && isBothTermFields && ensureColumnsOfDifferentTables; } else { return false; } } private static boolean isJoinQualwithSchemas(Schema leftSchema, Schema rightSchema, Column left, Column right) { boolean duplicated = leftSchema.contains(left) && rightSchema.contains(left); duplicated |= leftSchema.contains(right) && rightSchema.contains(right); if (duplicated) { return false; } boolean isJoinQual = leftSchema.contains(left) && rightSchema.contains(right); isJoinQual |= leftSchema.contains(right) && rightSchema.contains(left); return isJoinQual; } private static boolean isJoinQualWithOnlyColumns(@Nullable LogicalPlan.QueryBlock block, Column left, Column right) { String leftQualifier = IdentifierUtil.extractQualifier(left.getQualifiedName()); String rightQualifier = IdentifierUtil.extractQualifier(right.getQualifiedName()); // if block is given, it will track an original expression of each term in order to decide whether // this expression is a join condition, or not. if (block != null) { boolean leftQualified = IdentifierUtil.isFQColumnName(left.getQualifiedName()); boolean rightQualified = IdentifierUtil.isFQColumnName(right.getQualifiedName()); if (!leftQualified) { // if left one is aliased name // getting original expression of left term NamedExpr rawExpr = block.getNamedExprsManager().getNamedExpr(left.getQualifiedName()); Set<ColumnReferenceExpr> foundColumns = ExprFinder.finds(rawExpr.getExpr(), OpType.Column); // ensure there is only one column of an original expression if (foundColumns.size() == 1) { leftQualifier = IdentifierUtil.extractQualifier(foundColumns.iterator().next().getCanonicalName()); } } if (!rightQualified) { // if right one is aliased name // getting original expression of right term NamedExpr rawExpr = block.getNamedExprsManager().getNamedExpr(right.getQualifiedName()); Set<ColumnReferenceExpr> foundColumns = ExprFinder.finds(rawExpr.getExpr(), OpType.Column); // ensure there is only one column of an original expression if (foundColumns.size() == 1) { rightQualifier = IdentifierUtil.extractQualifier(foundColumns.iterator().next().getCanonicalName()); } } } // if columns of both term is different to each other, it will be true. return !leftQualifier.equals(rightQualifier); } public static boolean isSingleColumn(EvalNode evalNode) { return EvalTreeUtil.findUniqueColumns(evalNode).size() == 1; } public static class ChangeColumnRefVisitor implements EvalNodeVisitor { private final String findColumn; private final String toBeChanged; public ChangeColumnRefVisitor(String oldName, String newName) { this.findColumn = oldName; this.toBeChanged = newName; } @Override public void visit(EvalNode node) { if (node.type == EvalType.FIELD) { FieldEval field = (FieldEval) node; if (field.getColumnName().equals(findColumn) || field.getName().equals(findColumn)) { field.replaceColumnRef(toBeChanged); } } } } public static class AllColumnRefFinder implements EvalNodeVisitor { private List<Column> colList = new ArrayList<>(); private FieldEval field = null; @Override public void visit(EvalNode node) { if (node.getType() == EvalType.FIELD) { field = (FieldEval) node; colList.add(field.getColumnRef()); } } public List<Column> getColumnRefs() { return this.colList; } } public static class UniqueColumnFinder implements EvalNodeVisitor { private LinkedHashSet<Column> columnSet = Sets.newLinkedHashSet(); private FieldEval field = null; @Override public void visit(EvalNode node) { if (node.getType() == EvalType.FIELD) { field = (FieldEval) node; columnSet.add(field.getColumnRef()); } } public LinkedHashSet<Column> getColumnRefs() { return this.columnSet; } } public static class VariableCounter implements EvalNodeVisitor { private final Map<EvalType, Integer> counter; public VariableCounter() { counter = Maps.newHashMap(); counter.put(EvalType.FUNCTION, 0); counter.put(EvalType.FIELD, 0); } @Override public void visit(EvalNode node) { if (counter.containsKey(node.getType())) { int val = counter.get(node.getType()); val++; counter.put(node.getType(), val); } } public Map<EvalType, Integer> getCounter() { return counter; } } public static Set<AggregationFunctionCallEval> findDistinctAggFunction(EvalNode expr) { AllAggFunctionFinder finder = new AllAggFunctionFinder(); expr.postOrder(finder); return finder.getAggregationFunction(); } public static class AllAggFunctionFinder implements EvalNodeVisitor { private Set<AggregationFunctionCallEval> aggFucntions = Sets.newHashSet(); private AggregationFunctionCallEval field = null; @Override public void visit(EvalNode node) { if (node.getType() == EvalType.AGG_FUNCTION) { field = (AggregationFunctionCallEval) node; aggFucntions.add(field); } } public Set<AggregationFunctionCallEval> getAggregationFunction() { return this.aggFucntions; } } public static Set<WindowFunctionEval> findWindowFunction(EvalNode expr) { AllWindowFunctionFinder finder = new AllWindowFunctionFinder(); expr.postOrder(finder); return finder.getWindowFunctionSet(); } public static class AllWindowFunctionFinder implements EvalNodeVisitor { private Set<WindowFunctionEval> windowFunctions = Sets.newHashSet(); @Override public void visit(EvalNode node) { if (node.getType() == EvalType.WINDOW_FUNCTION) { WindowFunctionEval field = (WindowFunctionEval) node; windowFunctions.add(field); } } public Set<WindowFunctionEval> getWindowFunctionSet() { return windowFunctions; } } public static <T extends EvalNode> Collection<T> findEvalsByType(EvalNode evalNode, EvalType type) { EvalFinder finder = new EvalFinder(type); finder.visit(null, evalNode, new Stack<>()); return (Collection<T>) finder.evalNodes; } public static <T extends EvalNode> Collection<T> findOuterJoinSensitiveEvals(EvalNode evalNode) { OuterJoinSensitiveEvalFinder finder = new OuterJoinSensitiveEvalFinder(); finder.visit(null, evalNode, new Stack<>()); return (Collection<T>) finder.evalNodes; } public static class EvalFinder extends BasicEvalNodeVisitor<Object, Object> { private EvalType targetType; List<EvalNode> evalNodes = new ArrayList<>(); public EvalFinder(EvalType targetType) { this.targetType = targetType; } @Override public Object visit(Object context, EvalNode evalNode, Stack<EvalNode> stack) { super.visit(context, evalNode, stack); if (evalNode.type == targetType) { evalNodes.add(evalNode); } return evalNode; } public List<EvalNode> getEvalNodes() { return evalNodes; } } public static class OuterJoinSensitiveEvalFinder extends BasicEvalNodeVisitor<Object, Object> { private List<EvalNode> evalNodes = new ArrayList<>(); @Override public Object visit(Object context, EvalNode evalNode, Stack<EvalNode> stack) { super.visit(context, evalNode, stack); if (evalNode.type == EvalType.CASE) { evalNodes.add(evalNode); } else if (evalNode.type == EvalType.FUNCTION) { FunctionEval functionEval = (FunctionEval)evalNode; if ("coalesce".equals(functionEval.getName())) { evalNodes.add(evalNode); } } else if (evalNode.type == EvalType.IS_NULL) { evalNodes.add(evalNode); } return evalNode; } } public static boolean checkIfCanBeConstant(EvalNode evalNode) { return findUniqueColumns(evalNode).size() == 0 && findDistinctAggFunction(evalNode).size() == 0; } public static Datum evaluateImmediately(EvalContext evalContext, EvalNode evalNode) { evalNode.bind(evalContext, null); return evalNode.eval(null); } /** * Checks whether EvalNode consists of only partition columns and const values. * The partition based simple query can be defined as 'select * from tb_name where col_name1="X" and col_name2="Y" [LIMIT Z]', * whose WHERE clause consists of only partition-columns with constant values. * Partition columns must be able to form a prefix of HDFS path like '/tb_name1/col_name1=X/col_name2=Y'. * * @param node The qualification node of a SELECTION node * @param partSchema Partition expression schema * @return True if the query is partition-column based simple query. */ public static boolean checkIfPartitionSelection(EvalNode node, Schema partSchema) { if (node != null && node instanceof BinaryEval) { BinaryEval eval = (BinaryEval)node; EvalNode left = eval.getLeftExpr(); EvalNode right = eval.getRightExpr(); EvalType type = eval.getType(); if (type == EvalType.EQUAL) { if (left instanceof FieldEval && right instanceof ConstEval && partSchema.contains(((FieldEval) left).getColumnName())) { return true; } else if (left instanceof ConstEval && right instanceof FieldEval && partSchema.contains(((FieldEval) right).getColumnName())) { return true; } } else if ((type == EvalType.AND || type == EvalType.OR) && left instanceof BinaryEval && right instanceof BinaryEval) { return checkIfPartitionSelection(left, partSchema) && checkIfPartitionSelection(right, partSchema); } } return false; } }