aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorNattavut Sutyanyong <nsy.can@gmail.com>2016-11-22 12:06:21 -0800
committerHerman van Hovell <hvanhovell@databricks.com>2016-11-22 12:06:21 -0800
commit45ea46b7b397f023b4da878eb11e21b08d931115 (patch)
tree51be6bfe31812109263bac69f947ef315b5c084c /sql/core
parentbb152cdfbb8d02130c71d2326ae81939725c2cf0 (diff)
downloadspark-45ea46b7b397f023b4da878eb11e21b08d931115.tar.gz
spark-45ea46b7b397f023b4da878eb11e21b08d931115.tar.bz2
spark-45ea46b7b397f023b4da878eb11e21b08d931115.zip
[SPARK-18504][SQL] Scalar subquery with extra group by columns returning incorrect result
## What changes were proposed in this pull request? This PR blocks an incorrect result scenario in scalar subquery where there are GROUP BY column(s) that are not part of the correlated predicate(s). Example: // Incorrect result Seq(1).toDF("c1").createOrReplaceTempView("t1") Seq((1,1),(1,2)).toDF("c1","c2").createOrReplaceTempView("t2") sql("select (select sum(-1) from t2 where t1.c1=t2.c1 group by t2.c2) from t1").show // How can selecting a scalar subquery from a 1-row table return 2 rows? ## How was this patch tested? sql/test, catalyst/test new test case covering the reported problem is added to SubquerySuite.scala Author: Nattavut Sutyanyong <nsy.can@gmail.com> Closes #15936 from nsyca/scalarSubqueryIncorrect-1.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala12
1 files changed, 12 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index c84a6f1618..f1dd1c620e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -483,6 +483,18 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
Row(1, null) :: Row(2, 6.0) :: Row(3, 2.0) :: Row(null, null) :: Row(6, null) :: Nil)
}
+ test("SPARK-18504 extra GROUP BY column in correlated scalar subquery is not permitted") {
+ withTempView("t") {
+ Seq((1, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
+
+ val errMsg = intercept[AnalysisException] {
+ sql("select (select sum(-1) from t t2 where t1.c2 = t2.c1 group by t2.c2) sum from t t1")
+ }
+ assert(errMsg.getMessage.contains(
+ "a GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns:"))
+ }
+ }
+
test("non-aggregated correlated scalar subquery") {
val msg1 = intercept[AnalysisException] {
sql("select a, (select b from l l2 where l2.a = l1.a) sum_b from l l1")