From 44b2311d946981c8251cb7807d70c8e99db5bbed Mon Sep 17 00:00:00 2001 From: tianyi Date: Wed, 11 Feb 2015 12:50:17 -0800 Subject: [SPARK-3688][SQL]LogicalPlan can't resolve column correctlly This PR fixed the resolving problem described in https://issues.apache.org/jira/browse/SPARK-3688 ``` CREATE TABLE t1(x INT); CREATE TABLE t2(a STRUCT, k INT); SELECT a.x FROM t1 a JOIN t2 b ON a.x = b.k; ``` Author: tianyi Closes #4524 from tianyi/SPARK-3688 and squashes the following commits: 237a256 [tianyi] resolve a name with table.column pattern first. --- .../sql/catalyst/plans/logical/LogicalPlan.scala | 50 ++++++++++++++-------- ...lved as hive-0-c6d02549aec166e16bfc44d5905fa33a | 0 ...lved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f | 0 ...lved as hive-2-26f54240cf5b909086fc34a34d7fdb56 | 0 ...lved as hive-3-d08d5280027adea681001ad82a5a6974 | 0 ...lved as hive-4-22eb25b5be6daf72a6649adfe5041749 | 1 + .../sql/hive/execution/HiveResolutionSuite.scala | 9 ++++ 7 files changed, 42 insertions(+), 18 deletions(-) create mode 100644 sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a create mode 100644 sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f create mode 100644 sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb56 create mode 100644 sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a6974 create mode 100644 sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe5041749 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 7cf4b81274..b23f8d03df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -128,6 +128,29 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { def resolve(name: String, resolver: Resolver): Option[NamedExpression] = resolve(name, output, resolver) + def resolveAsTableColumn( + nameParts: Array[String], + resolver: Resolver, + attribute: Attribute): List[(Attribute, List[String])] = { + if (attribute.qualifiers.find(resolver(_, nameParts.head)).nonEmpty && nameParts.size > 1) { + val remainingParts = nameParts.drop(1) + resolveAsColumn(remainingParts, resolver, attribute) + } else { + Nil + } + } + + def resolveAsColumn( + nameParts: Array[String], + resolver: Resolver, + attribute: Attribute): List[(Attribute, List[String])] = { + if (resolver(attribute.name, nameParts.head)) { + (attribute.withName(nameParts.head), nameParts.tail.toList) :: Nil + } else { + Nil + } + } + /** Performs attribute resolution given a name and a sequence of possible attributes. */ protected def resolve( name: String, @@ -136,24 +159,15 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { val parts = name.split("\\.") - // Collect all attributes that are output by this nodes children where either the first part - // matches the name or where the first part matches the scope and the second part matches the - // name. Return these matches along with any remaining parts, which represent dotted access to - // struct fields. - val options = input.flatMap { option => - // If the first part of the desired name matches a qualifier for this possible match, drop it. - val remainingParts = - if (option.qualifiers.find(resolver(_, parts.head)).nonEmpty && parts.size > 1) { - parts.drop(1) - } else { - parts - } - - if (resolver(option.name, remainingParts.head)) { - // Preserve the case of the user's attribute reference. - (option.withName(remainingParts.head), remainingParts.tail.toList) :: Nil - } else { - Nil + // We will try to resolve this name as `table.column` pattern first. + var options = input.flatMap { option => + resolveAsTableColumn(parts, resolver, option) + } + + // If none of attributes match `table.column` pattern, we try to resolve it as a column. + if(options.isEmpty) { + options = input.flatMap { option => + resolveAsColumn(parts, resolver, option) } } diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb56 b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb56 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a6974 b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a6974 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe5041749 b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe5041749 new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe5041749 @@ -0,0 +1 @@ +1 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala index ab5f9cdddf..029c36aa89 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala @@ -99,6 +99,15 @@ class HiveResolutionSuite extends HiveComparisonTest { assert(sql("SELECT nestedArray[0].a FROM nestedRepeatedTest").collect().head(0) === 1) } + createQueryTest("test ambiguousReferences resolved as hive", + """ + |CREATE TABLE t1(x INT); + |CREATE TABLE t2(a STRUCT, k INT); + |INSERT OVERWRITE TABLE t1 SELECT 1 FROM src LIMIT 1; + |INSERT OVERWRITE TABLE t2 SELECT named_struct("x",1),1 FROM src LIMIT 1; + |SELECT a.x FROM t1 a JOIN t2 b ON a.x = b.k; + """.stripMargin) + /** * Negative examples. Currently only left here for documentation purposes. * TODO(marmbrus): Test that catalyst fails on these queries. -- cgit v1.2.3