aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2016-04-07 11:51:34 -0700
committerDavies Liu <davies.liu@gmail.com>2016-04-07 11:51:34 -0700
commitaa852215f82876977d164f371627e894e86baacc (patch)
treeb62a001aa6d74fda188a12f96dde53230b94ed93 /sql/core
parent8dcb0c7c974e9707933ac2ae6ce837e765a5e81a (diff)
downloadspark-aa852215f82876977d164f371627e894e86baacc.tar.gz
spark-aa852215f82876977d164f371627e894e86baacc.tar.bz2
spark-aa852215f82876977d164f371627e894e86baacc.zip
[SPARK-12740] [SPARK-13932] support grouping()/grouping_id() in having/order clause
## What changes were proposed in this pull request? This PR brings the support of using grouping()/grouping_id() in HAVING/ORDER BY clause. The resolved grouping()/grouping_id() will be replaced by unresolved "spark_gropuing_id" virtual attribute, then resolved by ResolveMissingAttribute. This PR also fix the HAVING clause that access a grouping column that is not presented in SELECT clause, for example: ```sql select count(1) from (select 1 as a) t group by a having a > 0 ``` ## How was this patch tested? Add new tests. Author: Davies Liu <davies@databricks.com> Closes #12235 from davies/grouping_having.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala82
1 files changed, 82 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 2ab7c1581c..dd648cdb81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2230,6 +2230,88 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead")
}
+ test("grouping and grouping_id in having") {
+ checkAnswer(
+ sql("select course, year from courseSales group by cube(course, year)" +
+ " having grouping(year) = 1 and grouping_id(course, year) > 0"),
+ Row("Java", null) ::
+ Row("dotNET", null) ::
+ Row(null, null) :: Nil
+ )
+
+ var error = intercept[AnalysisException] {
+ sql("select course, year from courseSales group by course, year" +
+ " having grouping(course) > 0")
+ }
+ assert(error.getMessage contains
+ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+ error = intercept[AnalysisException] {
+ sql("select course, year from courseSales group by course, year" +
+ " having grouping_id(course, year) > 0")
+ }
+ assert(error.getMessage contains
+ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+ error = intercept[AnalysisException] {
+ sql("select course, year from courseSales group by cube(course, year)" +
+ " having grouping__id > 0")
+ }
+ assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead")
+ }
+
+ test("grouping and grouping_id in sort") {
+ checkAnswer(
+ sql("select course, year, grouping(course), grouping(year) from courseSales" +
+ " group by cube(course, year) order by grouping_id(course, year), course, year"),
+ Row("Java", 2012, 0, 0) ::
+ Row("Java", 2013, 0, 0) ::
+ Row("dotNET", 2012, 0, 0) ::
+ Row("dotNET", 2013, 0, 0) ::
+ Row("Java", null, 0, 1) ::
+ Row("dotNET", null, 0, 1) ::
+ Row(null, 2012, 1, 0) ::
+ Row(null, 2013, 1, 0) ::
+ Row(null, null, 1, 1) :: Nil
+ )
+
+ checkAnswer(
+ sql("select course, year, grouping_id(course, year) from courseSales" +
+ " group by cube(course, year) order by grouping(course), grouping(year), course, year"),
+ Row("Java", 2012, 0) ::
+ Row("Java", 2013, 0) ::
+ Row("dotNET", 2012, 0) ::
+ Row("dotNET", 2013, 0) ::
+ Row("Java", null, 1) ::
+ Row("dotNET", null, 1) ::
+ Row(null, 2012, 2) ::
+ Row(null, 2013, 2) ::
+ Row(null, null, 3) :: Nil
+ )
+
+ var error = intercept[AnalysisException] {
+ sql("select course, year from courseSales group by course, year" +
+ " order by grouping(course)")
+ }
+ assert(error.getMessage contains
+ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+ error = intercept[AnalysisException] {
+ sql("select course, year from courseSales group by course, year" +
+ " order by grouping_id(course, year)")
+ }
+ assert(error.getMessage contains
+ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+ error = intercept[AnalysisException] {
+ sql("select course, year from courseSales group by cube(course, year)" +
+ " order by grouping__id")
+ }
+ assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead")
+ }
+
+ test("filter on a grouping column that is not presented in SELECT") {
+ checkAnswer(
+ sql("select count(1) from (select 1 as a) t group by a having a > 0"),
+ Row(1) :: Nil)
+ }
+
test("SPARK-13056: Null in map value causes NPE") {
val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value")
withTempTable("maptest") {