/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.plan.rewrite.rules; import com.google.common.base.Preconditions; import org.apache.tajo.algebra.JoinType; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.exception.NotImplementedException; import org.apache.tajo.exception.TajoException; import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.LogicalPlan.QueryBlock; import org.apache.tajo.plan.Target; import org.apache.tajo.plan.expr.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; import java.util.*; /** * InSubqueryRewriteRule finds all subqueries occurring in the where clause with "IN" keywords, * and replaces them with appropriate join plans. * This rule must be executed before {@link FilterPushDownRule}. * */ public class InSubqueryRewriteRule implements LogicalPlanRewriteRule { private static final String NAME = "InSubqueryRewrite"; private final Rewriter rewriter = new Rewriter(); @Override public String getName() { return NAME; } @Override public boolean isEligible(LogicalPlanRewriteRuleContext context) { for (LogicalNode eachNode : PlannerUtil.findAllNodes(context.getPlan().getRootNode(), NodeType.SELECTION)) { SelectionNode selectionNode = (SelectionNode) eachNode; if (!extractInSubquery(selectionNode.getQual()).isEmpty()) { return true; } } return false; } static List<InEval> extractInSubquery(EvalNode qual) { List<InEval> inSubqueries = new ArrayList<>(); for (EvalNode eachQual : EvalTreeUtil.findEvalsByType(qual, EvalType.IN)) { InEval inEval = (InEval) eachQual; if (inEval.getRightExpr().getType() == EvalType.SUBQUERY) { inSubqueries.add(inEval); } } return inSubqueries; } @Override public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoException { LogicalPlan.QueryBlock rootBlock = context.getPlan().getRootBlock(); LogicalPlan plan = context.getPlan(); rewriter.visit(context.getQueryContext(), plan, rootBlock, rootBlock.getRoot(), new Stack<>()); return plan; } private static final class Rewriter extends BasicLogicalPlanVisitor<Object, Object> { @Override public Object visitFilter(Object context, LogicalPlan plan, LogicalPlan.QueryBlock block, SelectionNode node, Stack<LogicalNode> stack) throws TajoException { // Since InSubqueryRewriteRule is executed before FilterPushDownRule, // we can expect that in-subqueries are found at only SelectionNode. // Visit every child first. List<InEval> inSubqueries = extractInSubquery(node.getQual()); stack.push(node); for (InEval eachIn : inSubqueries) { SubqueryEval subqueryEval = eachIn.getRightExpr(); QueryBlock childBlock = plan.getBlock(subqueryEval.getSubQueryNode().getSubQuery()); visit(context, plan, childBlock, childBlock.getRoot(), stack); } visit(context, plan, block, node.getChild(), stack); stack.pop(); LogicalNode baseRelation = node.getChild(); for (InEval eachIn : inSubqueries) { // 1. find the base relation for the column of the outer query // We assume that the left child of an in-subquery is either a FieldEval or a CastEval. Preconditions.checkArgument(eachIn.getLeftExpr().getType() == EvalType.FIELD || eachIn.getLeftExpr().getType() == EvalType.CAST); EvalNode leftEval = eachIn.getLeftExpr(); SubqueryEval subqueryEval = eachIn.getRightExpr(); QueryBlock childBlock = plan.getBlock(subqueryEval.getSubQueryNode().getSubQuery()); // 2. create join JoinType joinType = eachIn.isNot() ? JoinType.LEFT_ANTI : JoinType.LEFT_SEMI; if (joinType == JoinType.LEFT_ANTI) { throw new TajoRuntimeException(new NotImplementedException("Not-in subquery")); } JoinNode joinNode = new JoinNode(plan.newPID()); joinNode.init(joinType, baseRelation, subqueryEval.getSubQueryNode()); joinNode.setJoinQual(buildJoinCondition(leftEval, subqueryEval.getSubQueryNode())); ProjectionNode projectionNode = PlannerUtil.findTopNode(subqueryEval.getSubQueryNode(), NodeType.PROJECTION); // Insert an aggregation operator rather than just setting the distinct flag of the ProjectionNode // because the performance of distinct aggregation is poor. insertDistinctOperator(plan, childBlock, projectionNode, projectionNode.getChild()); Schema inSchema = SchemaUtil.merge(joinNode.getLeftChild().getOutSchema(), joinNode.getRightChild().getOutSchema()); joinNode.setInSchema(inSchema); joinNode.setOutSchema(node.getOutSchema()); List<Target> targets = new ArrayList<>(); targets.addAll(PlannerUtil.schemaToTargets(inSchema)); joinNode.setTargets(targets); block.addJoinType(joinType); block.registerNode(joinNode); plan.addHistory("IN subquery is rewritten into " + (eachIn.isNot() ? "anti" : "semi") + " join."); // 3. set the created join as the base relation baseRelation = joinNode; } // 4. remove in quals EvalNode[] originDnfs = AlgebraicUtil.toDisjunctiveNormalFormArray(node.getQual()); List<EvalNode> rewrittenDnfs = new ArrayList<>(); for (EvalNode eachDnf : originDnfs) { Set<EvalNode> cnfs = new HashSet<>(Arrays.asList(AlgebraicUtil.toConjunctiveNormalFormArray(eachDnf))); cnfs.removeAll(inSubqueries); if (!cnfs.isEmpty()) { rewrittenDnfs.add(AlgebraicUtil.createSingletonExprFromCNF(cnfs)); } } if (rewrittenDnfs.size() > 0) { node.setQual(AlgebraicUtil.createSingletonExprFromDNF(rewrittenDnfs.toArray(new EvalNode[rewrittenDnfs.size()]))); // The current selection node is expected to be removed at the filter push down phase. node.setChild(baseRelation); } else { PlannerUtil.replaceNode(plan, block.getRoot(), node, baseRelation); block.unregisterNode(node); } return null; } private void insertDistinctOperator(LogicalPlan plan, LogicalPlan.QueryBlock block, ProjectionNode projectionNode, LogicalNode child) throws TajoException { if (projectionNode.getChild().getType() != NodeType.GROUP_BY) { Schema outSchema = projectionNode.getOutSchema(); GroupbyNode dupRemoval = plan.createNode(GroupbyNode.class); dupRemoval.setChild(child); dupRemoval.setInSchema(projectionNode.getInSchema()); dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema)); dupRemoval.setGroupingColumns(outSchema.toArray()); block.registerNode(dupRemoval); block.setAggregationRequire(); projectionNode.setChild(dupRemoval); projectionNode.setInSchema(dupRemoval.getOutSchema()); } } private EvalNode buildJoinCondition(EvalNode leftField, TableSubQueryNode subQueryNode) { FieldEval rightField = new FieldEval(subQueryNode.getOutSchema().getColumn(0)); return new BinaryEval(EvalType.EQUAL, leftField, rightField); } } }