aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortianyi <tianyi.asiainfo@gmail.com>2015-02-11 12:50:17 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-11 12:50:17 -0800
commit44b2311d946981c8251cb7807d70c8e99db5bbed (patch)
tree47701d4d8df99600281527829ebd76769a84a482
parenta60d2b70adff3a8fb3bdfac226b1d86fdb443da4 (diff)
downloadspark-44b2311d946981c8251cb7807d70c8e99db5bbed.tar.gz
spark-44b2311d946981c8251cb7807d70c8e99db5bbed.tar.bz2
spark-44b2311d946981c8251cb7807d70c8e99db5bbed.zip
[SPARK-3688][SQL]LogicalPlan can't resolve column correctlly
This PR fixed the resolving problem described in https://issues.apache.org/jira/browse/SPARK-3688 ``` CREATE TABLE t1(x INT); CREATE TABLE t2(a STRUCT<x: INT>, k INT); SELECT a.x FROM t1 a JOIN t2 b ON a.x = b.k; ``` Author: tianyi <tianyi.asiainfo@gmail.com> Closes #4524 from tianyi/SPARK-3688 and squashes the following commits: 237a256 [tianyi] resolve a name with table.column pattern first.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala50
-rw-r--r--sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a0
-rw-r--r--sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f0
-rw-r--r--sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb560
-rw-r--r--sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a69740
-rw-r--r--sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe50417491
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala9
7 files changed, 42 insertions, 18 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 7cf4b81274..b23f8d03df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -128,6 +128,29 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
def resolve(name: String, resolver: Resolver): Option[NamedExpression] =
resolve(name, output, resolver)
+ def resolveAsTableColumn(
+ nameParts: Array[String],
+ resolver: Resolver,
+ attribute: Attribute): List[(Attribute, List[String])] = {
+ if (attribute.qualifiers.find(resolver(_, nameParts.head)).nonEmpty && nameParts.size > 1) {
+ val remainingParts = nameParts.drop(1)
+ resolveAsColumn(remainingParts, resolver, attribute)
+ } else {
+ Nil
+ }
+ }
+
+ def resolveAsColumn(
+ nameParts: Array[String],
+ resolver: Resolver,
+ attribute: Attribute): List[(Attribute, List[String])] = {
+ if (resolver(attribute.name, nameParts.head)) {
+ (attribute.withName(nameParts.head), nameParts.tail.toList) :: Nil
+ } else {
+ Nil
+ }
+ }
+
/** Performs attribute resolution given a name and a sequence of possible attributes. */
protected def resolve(
name: String,
@@ -136,24 +159,15 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
val parts = name.split("\\.")
- // Collect all attributes that are output by this nodes children where either the first part
- // matches the name or where the first part matches the scope and the second part matches the
- // name. Return these matches along with any remaining parts, which represent dotted access to
- // struct fields.
- val options = input.flatMap { option =>
- // If the first part of the desired name matches a qualifier for this possible match, drop it.
- val remainingParts =
- if (option.qualifiers.find(resolver(_, parts.head)).nonEmpty && parts.size > 1) {
- parts.drop(1)
- } else {
- parts
- }
-
- if (resolver(option.name, remainingParts.head)) {
- // Preserve the case of the user's attribute reference.
- (option.withName(remainingParts.head), remainingParts.tail.toList) :: Nil
- } else {
- Nil
+ // We will try to resolve this name as `table.column` pattern first.
+ var options = input.flatMap { option =>
+ resolveAsTableColumn(parts, resolver, option)
+ }
+
+ // If none of attributes match `table.column` pattern, we try to resolve it as a column.
+ if(options.isEmpty) {
+ options = input.flatMap { option =>
+ resolveAsColumn(parts, resolver, option)
}
}
diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a
diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f
diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb56 b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb56
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb56
diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a6974 b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a6974
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a6974
diff --git a/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe5041749 b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe5041749
new file mode 100644
index 0000000000..d00491fd7e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-4-22eb25b5be6daf72a6649adfe5041749
@@ -0,0 +1 @@
+1
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index ab5f9cdddf..029c36aa89 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -99,6 +99,15 @@ class HiveResolutionSuite extends HiveComparisonTest {
assert(sql("SELECT nestedArray[0].a FROM nestedRepeatedTest").collect().head(0) === 1)
}
+ createQueryTest("test ambiguousReferences resolved as hive",
+ """
+ |CREATE TABLE t1(x INT);
+ |CREATE TABLE t2(a STRUCT<x: INT>, k INT);
+ |INSERT OVERWRITE TABLE t1 SELECT 1 FROM src LIMIT 1;
+ |INSERT OVERWRITE TABLE t2 SELECT named_struct("x",1),1 FROM src LIMIT 1;
+ |SELECT a.x FROM t1 a JOIN t2 b ON a.x = b.k;
+ """.stripMargin)
+
/**
* Negative examples. Currently only left here for documentation purposes.
* TODO(marmbrus): Test that catalyst fails on these queries.