EvalTreeUtil.java example

Explorer
tajo-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tajo.plan.expr;

import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.tajo.algebra.ColumnReferenceExpr;
import org.apache.tajo.algebra.NamedExpr;
import org.apache.tajo.algebra.OpType;
import org.apache.tajo.annotation.Nullable;
import org.apache.tajo.catalog.*;
import org.apache.tajo.common.TajoDataTypes.DataType;
import org.apache.tajo.datum.Datum;
import org.apache.tajo.exception.TajoInternalError;
import org.apache.tajo.plan.LogicalPlan;
import org.apache.tajo.plan.Target;
import org.apache.tajo.plan.util.ExprFinder;
import org.apache.tajo.schema.IdentifierUtil;

import java.util.*;

public class EvalTreeUtil {

  public static void changeColumnRef(EvalNode node, String oldName, String newName) {
    node.postOrder(new ChangeColumnRefVisitor(oldName, newName));
  }

  public static int replace(EvalNode expr, EvalNode targetExpr, EvalNode tobeReplaced) {
    EvalReplaceVisitor replacer = new EvalReplaceVisitor(targetExpr, tobeReplaced);
    ReplaceContext context = new ReplaceContext();
    replacer.visit(context, expr, new Stack<>());
    return context.countOfReplaces;
  }

  private static class ReplaceContext {
    int countOfReplaces = 0;
  }

  public static class EvalReplaceVisitor extends BasicEvalNodeVisitor<ReplaceContext, EvalNode> {
    private EvalNode target;
    private EvalNode tobeReplaced;

    public EvalReplaceVisitor(EvalNode target, EvalNode tobeReplaced) {
      this.target = target;
      this.tobeReplaced = tobeReplaced;
    }

    @Override
    public EvalNode visit(ReplaceContext context, EvalNode evalNode, Stack<EvalNode> stack) {
      super.visit(context, evalNode, stack);

      if (evalNode.equals(target)) {
        context.countOfReplaces++;

        EvalNode parent = stack.peek();

        if (parent instanceof BetweenPredicateEval) {
          BetweenPredicateEval between = (BetweenPredicateEval) parent;
          if (between.getPredicand().equals(evalNode)) {
            between.setPredicand(tobeReplaced);
          }
          if (between.getBegin().equals(evalNode)) {
            between.setBegin(tobeReplaced);
          }
          if (between.getEnd().equals(evalNode)) {
            between.setEnd(tobeReplaced);
          }

        } else if (parent instanceof CaseWhenEval) {
          CaseWhenEval caseWhen = (CaseWhenEval) parent;

          // Here, we need to only consider only 'Else'
          // because IfElseEval is handled in the below condition.
          if (caseWhen.hasElse() && caseWhen.getElse().equals(evalNode)) {
            caseWhen.setElseResult(tobeReplaced);
          }
        } else if (parent instanceof CaseWhenEval.IfThenEval) {
          CaseWhenEval.IfThenEval ifThen = (CaseWhenEval.IfThenEval) parent;
          if (ifThen.getCondition().equals(evalNode)) {
            ifThen.setCondition(tobeReplaced);
          }
          if (ifThen.getResult().equals(evalNode)) {
            ifThen.setResult(tobeReplaced);
          }
       } else if (parent instanceof FunctionEval) {
          FunctionEval functionEval = (FunctionEval) parent;
          EvalNode [] arguments = functionEval.getArgs();
          for (int i = 0; i < arguments.length; i++) {
            if (arguments[i].equals(evalNode)) {
              arguments[i] = tobeReplaced;
            }
          }
          functionEval.setArgs(arguments);

        } else if (parent instanceof UnaryEval) {
          if (((UnaryEval)parent).getChild().equals(evalNode)) {
            ((UnaryEval)parent).setChild(tobeReplaced);
          }
        } else if (parent instanceof BinaryEval) {
          BinaryEval binary = (BinaryEval) parent;
          if (binary.getLeftExpr() != null && binary.getLeftExpr().equals(evalNode)) {
            binary.setLeftExpr(tobeReplaced);
          }
          if (binary.getRightExpr() != null && binary.getRightExpr().equals(evalNode)) {
            binary.setRightExpr(tobeReplaced);
          }
        }
      }

      return evalNode;
    }
  }

  /**
   * It finds unique columns from a EvalNode.
   */
  public static LinkedHashSet<Column> findUniqueColumns(EvalNode node) {
    UniqueColumnFinder finder = new UniqueColumnFinder();
    node.postOrder(finder);
    return finder.getColumnRefs();
  }
  
  public static List<Column> findAllColumnRefs(EvalNode node) {
    AllColumnRefFinder finder = new AllColumnRefFinder();
    node.postOrder(finder);
    return finder.getColumnRefs();
  }

  public static Schema getSchemaByTargets(Schema inputSchema, List<Target> targets) {
    return SchemaBuilder.builder().addAll(targets, new Function<Target, Column>() {
      @Override
      public Column apply(@javax.annotation.Nullable Target target) {
        return new Column(target.hasAlias() ? target.getAlias() : target.getEvalTree().getName(),
            getDomainByExpr(inputSchema, target.getEvalTree()));
      }
    }).build();
  }

  public static String columnsToStr(Collection<Column> columns) {
    StringBuilder sb = new StringBuilder();
    String prefix = "";
    for (Column column: columns) {
      sb.append(prefix).append(column.getQualifiedName());
      prefix = ",";
    }

    return sb.toString();
  }
  
  public static DataType getDomainByExpr(Schema inputSchema, EvalNode expr) {
    switch (expr.getType()) {
    case AND:      
    case OR:
    case EQUAL:
    case NOT_EQUAL:
    case LTH:
    case LEQ:
    case GTH:
    case GEQ:
    case PLUS:
    case MINUS:
    case MULTIPLY:
    case DIVIDE:
    case CONST:
    case FUNCTION:
        return TypeConverter.convert(expr.getValueType()).getDataType();

    case FIELD:
      FieldEval fieldEval = (FieldEval) expr;
      return inputSchema.getColumn(fieldEval.getName()).getDataType();

      
    default:
      throw new TajoInternalError("Unknown expr type: " + expr.getType().toString());
    }
  }

  /**
   * Return all exprs to refer columns corresponding to the target.
   *
   * @param expr
   * @param target to be found
   * @return a list of exprs
   */
  public static Collection<EvalNode> getContainExpr(EvalNode expr, Column target) {
    Set<EvalNode> exprSet = Sets.newHashSet();
    getContainExpr(expr, target, exprSet);
    return exprSet;
  }
  
  /**
   * Return the counter to count the number of expression types individually.
   *  
   * @param expr
   * @return
   */
  public static Map<EvalType, Integer> getExprCounters(EvalNode expr) {
    VariableCounter counter = new VariableCounter();
    expr.postOrder(counter);
    return counter.getCounter();
  }
  
  private static void getContainExpr(EvalNode expr, Column target, Set<EvalNode> exprSet) {
    switch (expr.getType()) {
    case EQUAL:
    case LTH:
    case LEQ:
    case GTH:
    case GEQ:
    case NOT_EQUAL:
      if (containColumnRef(expr, target)) {          
        exprSet.add(expr);
      }
      break;
    default:
      break;
    }    
  }
  
  /**
   * Examine if the expr contains the column reference corresponding 
   * to the target column
   */
  public static boolean containColumnRef(EvalNode expr, Column target) {
    Set<Column> exprSet = findUniqueColumns(expr);
    return exprSet.contains(target);
  }

  /**
   * It separates a singular CNF-formed join condition into a join condition, a left join filter, and
   * right join filter.
   *
   * @param joinQual the original join condition
   * @param leftSchema Left table schema
   * @param rightSchema Left table schema
   * @return Three element EvalNodes, 0 - join condition, 1 - left join filter, 2 - right join filter.
   */
  public static EvalNode[] extractJoinConditions(EvalNode joinQual, Schema leftSchema, Schema rightSchema) {
    List<EvalNode> joinQuals = Lists.newArrayList();
    List<EvalNode> leftFilters = Lists.newArrayList();
    List<EvalNode> rightFilters = Lists.newArrayList();
    for (EvalNode eachQual : AlgebraicUtil.toConjunctiveNormalFormArray(joinQual)) {
      if (!(eachQual instanceof BinaryEval)) {
        continue; // todo 'between', etc.
      }
      BinaryEval binaryEval = (BinaryEval)eachQual;
      LinkedHashSet<Column> leftColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getLeftExpr());
      LinkedHashSet<Column> rightColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getRightExpr());
      boolean leftInLeft = leftSchema.containsAny(leftColumns);
      boolean rightInLeft = leftSchema.containsAny(rightColumns);
      boolean leftInRight = rightSchema.containsAny(leftColumns);
      boolean rightInRight = rightSchema.containsAny(rightColumns);

      boolean columnsFromLeft = leftInLeft || rightInLeft;
      boolean columnsFromRight = leftInRight || rightInRight;
      if (!columnsFromLeft && !columnsFromRight) {
        continue; // todo constant expression : this should be done in logical phase
      }
      if (columnsFromLeft ^ columnsFromRight) {
        if (columnsFromLeft) {
          leftFilters.add(eachQual);
        } else {
          rightFilters.add(eachQual);
        }
        continue;
      }
      if ((leftInLeft && rightInLeft) || (leftInRight && rightInRight)) {
        continue; // todo not allowed yet : this should be checked in logical phase
      }
      joinQuals.add(eachQual);
    }
    return new EvalNode[] {
        joinQuals.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(joinQuals),
        leftFilters.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(leftFilters),
        rightFilters.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(rightFilters)
    };
  }

  /**
   * If a given expression is join condition, it returns TRUE. Otherwise, it returns FALSE.
   *
   * If three conditions are satisfied, we can recognize the expression as a equi join condition.
   * <ol>
   *   <li>An expression is an equal comparison expression.</li>
   *   <li>Both terms in an expression are column references.</li>
   *   <li>Both column references point come from different tables</li>
   * </ol>
   *
   * For theta join condition, we will use "an expression is a predicate including column references which come
   * from different two tables" instead of the first rule.
   *
   * @param expr EvalNode to be evaluated
   * @param includeThetaJoin If true, it will return equi as well as non-equi join conditions.
   *                         Otherwise, it only returns equi-join conditions.
   * @return True if it is join condition.
   */
  public static boolean isJoinQual(EvalNode expr, boolean includeThetaJoin) {
    return isJoinQual(null, null, null, expr, includeThetaJoin);
  }

  /**
   * If a given expression is join condition, it returns TRUE. Otherwise, it returns FALSE.
   *
   * If three conditions are satisfied, we can recognize the expression as a equi join condition.
   * <ol>
   *   <li>An expression is an equal comparison expression.</li>
   *   <li>Both terms in an expression are column references.</li>
   *   <li>Both column references point come from different tables</li>
   * </ol>
   *
   * For theta join condition, we will use "an expression is a predicate including column references which come
   * from different two tables" instead of the first rule.
   *
   * @param block if block is not null, it tracks the lineage of aliased name derived from complex expressions.
   * @param leftSchema Schema to be used to check if columns belong to different relations
   * @param rightSchema Schema to be used to check if columns belong to different relations
   * @param expr EvalNode to be evaluated
   * @param includeThetaJoin If true, it will return equi as well as non-equi join conditions.
   *                         Otherwise, it only returns equi-join conditions.
   * @return True if it is join condition.
   */
  public static boolean isJoinQual(@Nullable LogicalPlan.QueryBlock block,
                                   @Nullable Schema leftSchema, @Nullable Schema rightSchema,
                                   EvalNode expr, boolean includeThetaJoin) {

    if (expr instanceof BinaryEval) {
      boolean joinComparator;
      if (includeThetaJoin) {
        joinComparator = EvalType.isComparisonOperator(expr.getType());
      } else {
        joinComparator = expr.getType() == EvalType.EQUAL;
      }

      BinaryEval binaryEval = (BinaryEval) expr;
      boolean isBothTermFields = isSingleColumn(binaryEval.getLeftExpr()) && isSingleColumn(binaryEval.getRightExpr());

      Set<Column> leftColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getLeftExpr());
      Set<Column> rightColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getRightExpr());

      boolean ensureColumnsOfDifferentTables = false;

      if (leftColumns.size() == 1 && rightColumns.size() == 1) { // ensure there is only one column of each table
        Column leftColumn = leftColumns.iterator().next();
        Column rightColumn = rightColumns.iterator().next();

        // ensure if both column belong to different tables
        if (block != null) {
          ensureColumnsOfDifferentTables = isJoinQualWithOnlyColumns(block, leftColumn, rightColumn);
        } else if (leftSchema != null && rightSchema != null) {
          ensureColumnsOfDifferentTables = isJoinQualwithSchemas(leftSchema, rightSchema, leftColumn, rightColumn);
        } else {
          ensureColumnsOfDifferentTables = isJoinQualWithOnlyColumns(null, leftColumn, rightColumn);
        }
      }

      return joinComparator && isBothTermFields && ensureColumnsOfDifferentTables;
    } else {
      return false;
    }
  }

  private static boolean isJoinQualwithSchemas(Schema leftSchema, Schema rightSchema, Column left, Column right) {

    boolean duplicated = leftSchema.contains(left) && rightSchema.contains(left);
    duplicated |= leftSchema.contains(right) && rightSchema.contains(right);

    if (duplicated) {
      return false;
    }

    boolean isJoinQual = leftSchema.contains(left) && rightSchema.contains(right);
    isJoinQual |= leftSchema.contains(right) && rightSchema.contains(left);

    return isJoinQual;
  }

  private static boolean isJoinQualWithOnlyColumns(@Nullable LogicalPlan.QueryBlock block,
                                                   Column left, Column right) {
    String leftQualifier = IdentifierUtil.extractQualifier(left.getQualifiedName());
    String rightQualifier = IdentifierUtil.extractQualifier(right.getQualifiedName());

    // if block is given, it will track an original expression of each term in order to decide whether
    // this expression is a join condition, or not.
    if (block != null) {
      boolean leftQualified = IdentifierUtil.isFQColumnName(left.getQualifiedName());
      boolean rightQualified = IdentifierUtil.isFQColumnName(right.getQualifiedName());

      if (!leftQualified) { // if left one is aliased name

        // getting original expression of left term
        NamedExpr rawExpr = block.getNamedExprsManager().getNamedExpr(left.getQualifiedName());
        Set<ColumnReferenceExpr> foundColumns = ExprFinder.finds(rawExpr.getExpr(), OpType.Column);

        // ensure there is only one column of an original expression
        if (foundColumns.size() == 1) {
          leftQualifier = IdentifierUtil.extractQualifier(foundColumns.iterator().next().getCanonicalName());
        }
      }
      if (!rightQualified) { // if right one is aliased name

        // getting original expression of right term
        NamedExpr rawExpr = block.getNamedExprsManager().getNamedExpr(right.getQualifiedName());
        Set<ColumnReferenceExpr> foundColumns = ExprFinder.finds(rawExpr.getExpr(), OpType.Column);

        // ensure there is only one column of an original expression
        if (foundColumns.size() == 1) {
          rightQualifier = IdentifierUtil.extractQualifier(foundColumns.iterator().next().getCanonicalName());
        }
      }
    }

    // if columns of both term is different to each other, it will be true.
    return !leftQualifier.equals(rightQualifier);
  }

  public static boolean isSingleColumn(EvalNode evalNode) {
    return EvalTreeUtil.findUniqueColumns(evalNode).size() == 1;
  }
  
  public static class ChangeColumnRefVisitor implements EvalNodeVisitor {    
    private final String findColumn;
    private final String toBeChanged;
    
    public ChangeColumnRefVisitor(String oldName, String newName) {
      this.findColumn = oldName;
      this.toBeChanged = newName;
    }
    
    @Override
    public void visit(EvalNode node) {
      if (node.type == EvalType.FIELD) {
        FieldEval field = (FieldEval) node;
        if (field.getColumnName().equals(findColumn)
            || field.getName().equals(findColumn)) {
          field.replaceColumnRef(toBeChanged);
        }
      }
    }    
  }
  
  public static class AllColumnRefFinder implements EvalNodeVisitor {
    private List<Column> colList = new ArrayList<>();
    private FieldEval field = null;
    
    @Override
    public void visit(EvalNode node) {
      if (node.getType() == EvalType.FIELD) {
        field = (FieldEval) node;
        colList.add(field.getColumnRef());
      } 
    }
    
    public List<Column> getColumnRefs() {
      return this.colList;
    }
  }
  
  public static class UniqueColumnFinder implements EvalNodeVisitor {
    private LinkedHashSet<Column> columnSet = Sets.newLinkedHashSet();
    private FieldEval field = null;
    
    @Override
    public void visit(EvalNode node) {
      if (node.getType() == EvalType.FIELD) {
        field = (FieldEval) node;
        columnSet.add(field.getColumnRef());
      }
    }
    
    public LinkedHashSet<Column> getColumnRefs() {
      return this.columnSet;
    }
  }
  
  public static class VariableCounter implements EvalNodeVisitor {
    private final Map<EvalType, Integer> counter;
    
    public VariableCounter() {
      counter = Maps.newHashMap();
      counter.put(EvalType.FUNCTION, 0);
      counter.put(EvalType.FIELD, 0);
    }
    
    @Override
    public void visit(EvalNode node) {
      if (counter.containsKey(node.getType())) {
        int val = counter.get(node.getType());
        val++;
        counter.put(node.getType(), val);
      }
    }
    
    public Map<EvalType, Integer> getCounter() {
      return counter;
    }
  }

  public static Set<AggregationFunctionCallEval> findDistinctAggFunction(EvalNode expr) {
    AllAggFunctionFinder finder = new AllAggFunctionFinder();
    expr.postOrder(finder);
    return finder.getAggregationFunction();
  }

  public static class AllAggFunctionFinder implements EvalNodeVisitor {
    private Set<AggregationFunctionCallEval> aggFucntions = Sets.newHashSet();
    private AggregationFunctionCallEval field = null;

    @Override
    public void visit(EvalNode node) {
      if (node.getType() == EvalType.AGG_FUNCTION) {
        field = (AggregationFunctionCallEval) node;
        aggFucntions.add(field);
      }
    }

    public Set<AggregationFunctionCallEval> getAggregationFunction() {
      return this.aggFucntions;
    }
  }
  
  public static Set<WindowFunctionEval> findWindowFunction(EvalNode expr) {
    AllWindowFunctionFinder finder = new AllWindowFunctionFinder();
    expr.postOrder(finder);
    return finder.getWindowFunctionSet();
  }
  
  public static class AllWindowFunctionFinder implements EvalNodeVisitor {
    private Set<WindowFunctionEval> windowFunctions = Sets.newHashSet();

    @Override
    public void visit(EvalNode node) {
      if (node.getType() == EvalType.WINDOW_FUNCTION) {
        WindowFunctionEval field = (WindowFunctionEval) node;
        windowFunctions.add(field);
      }
    }
    
    public Set<WindowFunctionEval> getWindowFunctionSet() {
      return windowFunctions;
    }
  }

  public static <T extends EvalNode> Collection<T> findEvalsByType(EvalNode evalNode, EvalType type) {
    EvalFinder finder = new EvalFinder(type);
    finder.visit(null, evalNode, new Stack<>());
    return (Collection<T>) finder.evalNodes;
  }

  public static <T extends EvalNode> Collection<T> findOuterJoinSensitiveEvals(EvalNode evalNode) {
    OuterJoinSensitiveEvalFinder finder = new OuterJoinSensitiveEvalFinder();
    finder.visit(null, evalNode, new Stack<>());
    return (Collection<T>) finder.evalNodes;
  }

  public static class EvalFinder extends BasicEvalNodeVisitor<Object, Object> {
    private EvalType targetType;
    List<EvalNode> evalNodes = new ArrayList<>();

    public EvalFinder(EvalType targetType) {
      this.targetType = targetType;
    }

    @Override
    public Object visit(Object context, EvalNode evalNode, Stack<EvalNode> stack) {
      super.visit(context, evalNode, stack);

      if (evalNode.type == targetType) {
        evalNodes.add(evalNode);
      }

      return evalNode;
    }

    public List<EvalNode> getEvalNodes() {
      return evalNodes;
    }
  }

  public static class OuterJoinSensitiveEvalFinder extends BasicEvalNodeVisitor<Object, Object> {
    private List<EvalNode> evalNodes = new ArrayList<>();

    @Override
    public Object visit(Object context, EvalNode evalNode, Stack<EvalNode> stack) {
      super.visit(context, evalNode, stack);

      if (evalNode.type == EvalType.CASE) {
        evalNodes.add(evalNode);
      } else if (evalNode.type == EvalType.FUNCTION) {
        FunctionEval functionEval = (FunctionEval)evalNode;
        if ("coalesce".equals(functionEval.getName())) {
          evalNodes.add(evalNode);
        }
      } else if (evalNode.type == EvalType.IS_NULL) {
        evalNodes.add(evalNode);
      }

      return evalNode;
    }
  }

  public static boolean checkIfCanBeConstant(EvalNode evalNode) {
    return findUniqueColumns(evalNode).size() == 0 && findDistinctAggFunction(evalNode).size() == 0;
  }

  public static Datum evaluateImmediately(EvalContext evalContext, EvalNode evalNode) {
    evalNode.bind(evalContext, null);
    return evalNode.eval(null);
  }

  /**
   * Checks whether EvalNode consists of only partition columns and const values.
   * The partition based simple query can be defined as 'select * from tb_name where col_name1="X" and col_name2="Y" [LIMIT Z]',
   * whose WHERE clause consists of only partition-columns with constant values.
   * Partition columns must be able to form a prefix of HDFS path like '/tb_name1/col_name1=X/col_name2=Y'.
   *
   * @param node The qualification node of a SELECTION node
   * @param partSchema Partition expression schema
   * @return True if the query is partition-column based simple query.
   */
  public static boolean checkIfPartitionSelection(EvalNode node, Schema partSchema) {
    if (node != null && node instanceof BinaryEval) {
      BinaryEval eval = (BinaryEval)node;
      EvalNode left = eval.getLeftExpr();
      EvalNode right = eval.getRightExpr();
      EvalType type = eval.getType();

      if (type == EvalType.EQUAL) {
        if (left instanceof FieldEval && right instanceof ConstEval && partSchema.contains(((FieldEval) left).getColumnName())) {
          return true;
        } else if (left instanceof ConstEval && right instanceof FieldEval && partSchema.contains(((FieldEval) right).getColumnName())) {
          return true;
        }
      } else if ((type == EvalType.AND || type == EvalType.OR)
          && left instanceof BinaryEval && right instanceof BinaryEval) {
        return checkIfPartitionSelection(left, partSchema) && checkIfPartitionSelection(right, partSchema);
      }
    }
    return false;
  }
}