aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorWenchen Fan <cloud0fan@outlook.com>2015-02-06 13:08:09 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-06 13:08:09 -0800
commit4793c8402a19afe4df51129a7f99e07494a76af2 (patch)
treea24381d399893601e2ab1647f34fd85d90d796ca /sql/hive
parentbc36356080e3b52aaf61fc1e6b204146ab96d29f (diff)
downloadspark-4793c8402a19afe4df51129a7f99e07494a76af2.tar.gz
spark-4793c8402a19afe4df51129a7f99e07494a76af2.tar.bz2
spark-4793c8402a19afe4df51129a7f99e07494a76af2.zip
[SPARK-5278][SQL] Introduce UnresolvedGetField and complete the check of ambiguous reference to fields
When the `GetField` chain(`a.b.c.d.....`) is interrupted by `GetItem` like `a.b[0].c.d....`, then the check of ambiguous reference to fields is broken. The reason is that: for something like `a.b[0].c.d`, we first parse it to `GetField(GetField(GetItem(Unresolved("a.b"), 0), "c"), "d")`. Then in `LogicalPlan#resolve`, we resolve `"a.b"` and build a `GetField` chain from bottom(the relation). But for the 2 outer `GetFiled`, we have to resolve them in `Analyzer` or do it in `GetField` lazily, check data type of child, search needed field, etc. which is similar to what we have done in `LogicalPlan#resolve`. So in this PR, the fix is just copy the same logic in `LogicalPlan#resolve` to `Analyzer`, which is simple and quick, but I do suggest introduce `UnresolvedGetFiled` like I explained in https://github.com/apache/spark/pull/2405. Author: Wenchen Fan <cloud0fan@outlook.com> Closes #4068 from cloud-fan/simple and squashes the following commits: a6857b5 [Wenchen Fan] fix import order 8411c40 [Wenchen Fan] use UnresolvedGetField
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala27
2 files changed, 18 insertions, 11 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4b7fa06532..2a4b880921 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -1038,7 +1038,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
nodeToExpr(qualifier) match {
case UnresolvedAttribute(qualifierName) =>
UnresolvedAttribute(qualifierName + "." + cleanIdentifier(attr))
- case other => GetField(other, attr)
+ case other => UnresolvedGetField(other, attr)
}
/* Stars (*) */
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 178ece4477..ff8130ae5f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -17,8 +17,7 @@
package org.apache.spark.sql.hive.execution
-import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.hive.test.TestHive.{sparkContext, sql}
+import org.apache.spark.sql.hive.test.TestHive.{sparkContext, jsonRDD, sql}
import org.apache.spark.sql.hive.test.TestHive.implicits._
case class Nested(a: Int, B: Int)
@@ -29,16 +28,24 @@ case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested])
*/
class HiveResolutionSuite extends HiveComparisonTest {
- case class NestedData(a: Seq[NestedData2], B: NestedData2)
- case class NestedData2(a: NestedData3, B: NestedData3)
- case class NestedData3(a: Int, B: Int)
-
test("SPARK-3698: case insensitive test for nested data") {
- sparkContext.makeRDD(Seq.empty[NestedData]).registerTempTable("nested")
+ jsonRDD(sparkContext.makeRDD(
+ """{"a": [{"a": {"a": 1}}]}""" :: Nil)).registerTempTable("nested")
// This should be successfully analyzed
sql("SELECT a[0].A.A from nested").queryExecution.analyzed
}
+ test("SPARK-5278: check ambiguous reference to fields") {
+ jsonRDD(sparkContext.makeRDD(
+ """{"a": [{"b": 1, "B": 2}]}""" :: Nil)).registerTempTable("nested")
+
+ // there are 2 filed matching field name "b", we should report Ambiguous reference error
+ val exception = intercept[RuntimeException] {
+ sql("SELECT a[0].b from nested").queryExecution.analyzed
+ }
+ assert(exception.getMessage.contains("Ambiguous reference to fields"))
+ }
+
createQueryTest("table.attr",
"SELECT src.key FROM src ORDER BY key LIMIT 1")
@@ -68,7 +75,7 @@ class HiveResolutionSuite extends HiveComparisonTest {
test("case insensitivity with scala reflection") {
// Test resolution with Scala Reflection
- TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
+ sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
.registerTempTable("caseSensitivityTest")
val query = sql("SELECT a, b, A, B, n.a, n.b, n.A, n.B FROM caseSensitivityTest")
@@ -79,14 +86,14 @@ class HiveResolutionSuite extends HiveComparisonTest {
ignore("case insensitivity with scala reflection joins") {
// Test resolution with Scala Reflection
- TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
+ sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
.registerTempTable("caseSensitivityTest")
sql("SELECT * FROM casesensitivitytest a JOIN casesensitivitytest b ON a.a = b.a").collect()
}
test("nested repeated resolution") {
- TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
+ sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
.registerTempTable("nestedRepeatedTest")
assert(sql("SELECT nestedArray[0].a FROM nestedRepeatedTest").collect().head(0) === 1)
}