/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.plan.nameresolver; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.tajo.algebra.ColumnReferenceExpr; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.NestedPathUtil; import org.apache.tajo.catalog.Schema; import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.exception.*; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.RelationNode; import org.apache.tajo.plan.logical.ScanNode; import org.apache.tajo.schema.IdentifierUtil; import org.apache.tajo.util.Pair; import org.apache.tajo.util.StringUtils; import java.util.*; /** * Column name resolution utility. A SQL statement can include many kinds of column names, * defined in different ways. Some column name indicates just a column in a relation. * Another column name includes alias table name or alias column name, derived from some expression. * * This utility ensures that each column name is derived from valid and accessible column, and * it also finds the exact data type of the column. * * Terminology: * <ul> * <li>Qualifier: database name, table name, or both included in a column name</li> * <li>Simple name: just column name without any qualifier</li> * <li>Alias name: another name to shortly specify a certain column</li> * <li>Fully qualified name: a column name with database name and table name</li> * <li>Canonical name: a fully qualified name, but its simple name is aliased name.</li> * </ul> */ public abstract class NameResolver { public static final Map<NameResolvingMode, NameResolver> resolverMap = Maps.newHashMap(); static { resolverMap.put(NameResolvingMode.RELS_ONLY, new ResolverByRels()); resolverMap.put(NameResolvingMode.RELS_AND_SUBEXPRS, new ResolverByRelsAndSubExprs()); resolverMap.put(NameResolvingMode.SUBEXPRS_AND_RELS, new ResolverBySubExprsAndRels()); resolverMap.put(NameResolvingMode.LEGACY, new ResolverByLegacy()); } public static Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr column, NameResolvingMode mode) throws TajoException { return resolve(plan, block, column, mode, false); } public static Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr column, NameResolvingMode mode, boolean includeSelfDescTable) throws TajoException { if (!resolverMap.containsKey(mode)) { throw new RuntimeException("Unsupported name resolving level: " + mode.name()); } return resolverMap.get(mode).resolve(plan, block, column, includeSelfDescTable); } abstract Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef, boolean includeSelfDescTable) throws TajoException; /** * Guess a relation from a table name regardless of whether the given name is qualified or not. * * @param block the current block * @param tableName The table name which can be either qualified or not. * @return A corresponding relation */ public static RelationNode lookupTable(LogicalPlan.QueryBlock block, String tableName) throws AmbiguousTableException { List<RelationNode> found = new ArrayList<>(); for (RelationNode relation : block.getRelations()) { // if a table name is qualified if (relation.getCanonicalName().equals(tableName) || relation.getTableName().equals(tableName)) { found.add(relation); // if a table name is not qualified } else if (IdentifierUtil.extractSimpleName(relation.getCanonicalName()).equals(tableName) || IdentifierUtil.extractSimpleName(relation.getTableName()).equals(tableName)) { found.add(relation); } } if (found.size() == 0) { return null; } else if (found.size() > 1) { throw new AmbiguousTableException(tableName); } return found.get(0); } /** * Find relations such that its schema contains a given column * * @param block the current block * @param columnName The column name to find relation * @return relations including a given column */ public static Collection<RelationNode> lookupTableByColumns(LogicalPlan.QueryBlock block, String columnName) { Set<RelationNode> found = new HashSet<>(); for (RelationNode rel : block.getRelations()) { if (rel.getLogicalSchema().contains(columnName)) { found.add(rel); } } return found; } /** * Try to find a column from all relations within a given query block. * If a given column reference is qualified, it tries to resolve the name * from only the relation corresponding to the qualifier. * * @param plan The logical plan * @param block The current query block * @param columnRef The column reference to be found * @return The found column */ static Column resolveFromRelsWithinBlock(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef, boolean includeSeflDescTable) throws AmbiguousColumnException, AmbiguousTableException, UndefinedColumnException, UndefinedTableException { String qualifier; String canonicalName; if (columnRef.hasQualifier()) { Pair<String, String> normalized; try { normalized = lookupQualifierAndCanonicalName(block, columnRef, includeSeflDescTable); } catch (UndefinedColumnException udce) { // is it correlated subquery? // if the search column is not found at the current block, find it at all ancestors of the block. LogicalPlan.QueryBlock current = block; while (!plan.getRootBlock().getName().equals(current.getName())) { LogicalPlan.QueryBlock parentBlock = plan.getParentBlock(current); for (RelationNode relationNode : parentBlock.getRelations()) { if (relationNode.getLogicalSchema().containsByQualifiedName(columnRef.getCanonicalName())) { throw new TajoRuntimeException(new NotImplementedException("Correlated subquery")); } } current = parentBlock; } throw udce; } qualifier = normalized.getFirst(); canonicalName = normalized.getSecond(); RelationNode relationOp = block.getRelation(qualifier); // If we cannot find any relation against a qualified column name if (relationOp == null) { throw new UndefinedTableException(qualifier); } Column column; if (includeSeflDescTable && describeSchemaByItself(relationOp)) { column = guessColumn(IdentifierUtil.buildFQName(normalized.getFirst(), normalized.getSecond())); } else { // Please consider a query case: // select lineitem.l_orderkey from lineitem a order by lineitem.l_orderkey; // // The relation lineitem is already renamed to "a", but lineitem.l_orderkey still should be available. // The below code makes it possible. Otherwise, it cannot find any match in the relation schema. if (block.isAlreadyRenamedTableName(IdentifierUtil.extractQualifier(canonicalName))) { canonicalName = IdentifierUtil.buildFQName(relationOp.getCanonicalName(), IdentifierUtil.extractSimpleName(canonicalName)); } Schema schema = relationOp.getLogicalSchema(); column = schema.getColumn(canonicalName); } return column; } else { return lookupColumnFromAllRelsInBlock(block, columnRef.getName(), includeSeflDescTable); } } /** * Try to find the column from the current node and child node. It can find subexprs generated from the optimizer. * * @param block The current query block * @param columnRef The column reference to be found * @return The found column */ static Column resolveFromCurrentAndChildNode(LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) throws UndefinedColumnException { if (block.getCurrentNode() != null && block.getCurrentNode().getInSchema() != null) { Column found = block.getCurrentNode().getInSchema().getColumn(columnRef.getCanonicalName()); if (found != null) { return found; } else if (block.getLatestNode() != null) { found = block.getLatestNode().getOutSchema().getColumn(columnRef.getName()); if (found != null) { return found; } } } return null; } /** * Lookup a column among all relations in the current block from a column name. * * It assumes that <code>columnName</code> is not any qualified name. * * @param block The current query block * @param columnName The column reference to be found * @return The found column */ static Column lookupColumnFromAllRelsInBlock(LogicalPlan.QueryBlock block, String columnName, boolean includeSelfDescTable) throws AmbiguousColumnException { Preconditions.checkArgument(IdentifierUtil.isSimpleIdentifier(columnName)); List<Column> candidates = new ArrayList<>(); for (RelationNode rel : block.getRelations()) { if (rel.isNameResolveBase()) { Column found = rel.getLogicalSchema().getColumn(columnName); if (found != null) { candidates.add(found); } } } if (!candidates.isEmpty()) { return ensureUniqueColumn(candidates); } else { if (includeSelfDescTable) { List<RelationNode> candidateRels = new ArrayList<>(); for (RelationNode rel : block.getRelations()) { if (describeSchemaByItself(rel)) { candidateRels.add(rel); } } if (candidateRels.size() == 1) { return guessColumn(IdentifierUtil.buildFQName(candidateRels.get(0).getCanonicalName(), columnName)); } else if (candidateRels.size() > 1) { throw new AmbiguousColumnException(columnName); } } return null; } } static boolean describeSchemaByItself(RelationNode relationNode) { if (relationNode instanceof ScanNode && ((ScanNode) relationNode).getTableDesc().hasEmptySchema()) { return true; } return false; } static Column guessColumn(String qualifiedName) { // TODO: other data types must be supported. return new Column(qualifiedName, Type.TEXT); } /** * Trying to find a column from all relations in other blocks * * @param plan The logical plan * @param columnRef The column reference to be found * @return The found column */ static Column resolveFromAllRelsInAllBlocks(LogicalPlan plan, ColumnReferenceExpr columnRef) throws AmbiguousColumnException { List<Column> candidates = Lists.newArrayList(); // from all relations of all query blocks for (LogicalPlan.QueryBlock eachBlock : plan.getQueryBlocks()) { for (RelationNode rel : eachBlock.getRelations()) { Column found = rel.getLogicalSchema().getColumn(columnRef.getName()); if (found != null) { candidates.add(found); } } } if (!candidates.isEmpty()) { return NameResolver.ensureUniqueColumn(candidates); } else { return null; } } /** * Try to find a column from the final schema of the current block. * * @param block The current query block * @param columnRef The column reference to be found * @return The found column */ static Column resolveAliasedName(LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) throws AmbiguousColumnException { List<Column> candidates = Lists.newArrayList(); if (block.getSchema() != null) { Column found = block.getSchema().getColumn(columnRef.getName()); if (found != null) { candidates.add(found); } } if (!candidates.isEmpty()) { return NameResolver.ensureUniqueColumn(candidates); } else { return null; } } /** * Lookup a qualifier and a canonical name of column. * * It returns a pair of names, which the first value is the qualifier ${database}.${table} and * the second value is column's simple name. * * @param block The current block * @param columnRef The column name * @return A pair of normalized qualifier and column name */ static Pair<String, String> lookupQualifierAndCanonicalName(LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef, boolean includeSeflDescTable) throws AmbiguousColumnException, AmbiguousTableException, UndefinedColumnException { Preconditions.checkArgument(columnRef.hasQualifier(), "ColumnReferenceExpr must be qualified."); String [] qualifierParts = columnRef.getQualifier().split("\\."); // This method assumes that column name consists of two or more dot chained names. // In this case, there must be three cases as follows: // // - dbname.tbname.column_name.nested_field... // - tbname.column_name.nested_field... // - column.nested_fieldX... Set<RelationNode> guessedRelations = new HashSet<>(); // this position indicates the index of column name in qualifierParts; // It must be 0 or more because a qualified column is always passed to lookupQualifierAndCanonicalName(). int columnNamePosition = -1; // check for dbname.tbname.column_name.nested_field if (qualifierParts.length >= 2) { RelationNode rel = lookupTable(block, IdentifierUtil.buildFQName(qualifierParts[0], qualifierParts[1])); if (rel != null) { guessedRelations.add(rel); columnNamePosition = 2; } } // check for tbname.column_name.nested_field if (columnNamePosition < 0 && qualifierParts.length >= 1) { RelationNode rel = lookupTable(block, qualifierParts[0]); if (rel != null) { guessedRelations.add(rel); columnNamePosition = 1; } } // column.nested_fieldX... if (columnNamePosition < 0 && guessedRelations.size() == 0 && qualifierParts.length > 0) { Collection<RelationNode> rels = lookupTableByColumns(block, StringUtils.join(qualifierParts, NestedPathUtil.PATH_DELIMITER, 0)); if (rels.size() > 1) { throw new AmbiguousColumnException(columnRef.getCanonicalName()); } if (rels.size() == 1) { guessedRelations.addAll(rels); columnNamePosition = 0; } } // throw exception if no column cannot be founded or two or more than columns are founded if (guessedRelations.size() == 0) { if (includeSeflDescTable) { // check self-describing relations for (RelationNode rel : block.getRelations()) { if (describeSchemaByItself(rel)) { columnNamePosition = 0; guessedRelations.add(rel); } } if (guessedRelations.size() > 1) { throw new AmbiguousColumnException(columnRef.getCanonicalName()); } else if (guessedRelations.size() == 0) { throw new UndefinedColumnException(columnRef.getCanonicalName()); } } else { throw new UndefinedColumnException(columnRef.getCanonicalName()); } } else if (guessedRelations.size() > 1) { throw new AmbiguousColumnException(columnRef.getCanonicalName()); } String qualifier = guessedRelations.iterator().next().getCanonicalName(); String columnName; if (columnNamePosition >= qualifierParts.length) { // if there is no column in qualifierParts columnName = columnRef.getName(); } else { // join a column name and its nested field names columnName = qualifierParts[columnNamePosition]; // if qualifierParts include nested field names if (qualifierParts.length > columnNamePosition + 1) { columnName += NestedPathUtil.PATH_DELIMITER + StringUtils.join(qualifierParts, NestedPathUtil.PATH_DELIMITER, columnNamePosition + 1, qualifierParts.length); } // columnRef always has a leaf field name. columnName += NestedPathUtil.PATH_DELIMITER + columnRef.getName(); } return new Pair<>(qualifier, columnName); } static Column ensureUniqueColumn(List<Column> candidates) throws AmbiguousColumnException { if (candidates.size() == 1) { return candidates.get(0); } else if (candidates.size() > 1) { StringBuilder sb = new StringBuilder(); boolean first = true; for (Column column : candidates) { if (first) { first = false; } else { sb.append(", "); } sb.append(column); } throw new AmbiguousColumnException(sb.toString()); } else { return null; } } }