aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-02-15 23:16:58 -0800
committerDavies Liu <davies.liu@gmail.com>2016-02-15 23:16:58 -0800
commitfee739f07b3bc37dd65682e93e60e0add848f583 (patch)
treef628025468812328819b3fe3a00b21390f00780e /sql/core/src
parente4675c240255207c5dd812fa657e6aca2dc9cfeb (diff)
downloadspark-fee739f07b3bc37dd65682e93e60e0add848f583.tar.gz
spark-fee739f07b3bc37dd65682e93e60e0add848f583.tar.bz2
spark-fee739f07b3bc37dd65682e93e60e0add848f583.zip
[SPARK-13221] [SQL] Fixing GroupingSets when Aggregate Functions Containing GroupBy Columns
Using GroupingSets will generate a wrong result when Aggregate Functions containing GroupBy columns. This PR is to fix it. Since the code changes are very small. Maybe we also can merge it to 1.6 For example, the following query returns a wrong result: ```scala sql("select course, sum(earnings) as sum from courseSales group by course, earnings" + " grouping sets((), (course), (course, earnings))" + " order by course, sum").show() ``` Before the fix, the results are like ``` [null,null] [Java,null] [Java,20000.0] [Java,30000.0] [dotNET,null] [dotNET,5000.0] [dotNET,10000.0] [dotNET,48000.0] ``` After the fix, the results become correct: ``` [null,113000.0] [Java,20000.0] [Java,30000.0] [Java,50000.0] [dotNET,5000.0] [dotNET,10000.0] [dotNET,48000.0] [dotNET,63000.0] ``` UPDATE: This PR also deprecated the external column: GROUPING__ID. Author: gatorsmile <gatorsmile@gmail.com> Closes #11100 from gatorsmile/groupingSets.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala34
1 files changed, 34 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index f665a1c87b..b3e179755a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2040,6 +2040,36 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)
}
+ test("grouping sets when aggregate functions containing groupBy columns") {
+ checkAnswer(
+ sql("select course, sum(earnings) as sum from courseSales group by course, earnings " +
+ "grouping sets((), (course), (course, earnings)) " +
+ "order by course, sum"),
+ Row(null, 113000.0) ::
+ Row("Java", 20000.0) ::
+ Row("Java", 30000.0) ::
+ Row("Java", 50000.0) ::
+ Row("dotNET", 5000.0) ::
+ Row("dotNET", 10000.0) ::
+ Row("dotNET", 48000.0) ::
+ Row("dotNET", 63000.0) :: Nil
+ )
+
+ checkAnswer(
+ sql("select course, sum(earnings) as sum, grouping_id(course, earnings) from courseSales " +
+ "group by course, earnings grouping sets((), (course), (course, earnings)) " +
+ "order by course, sum"),
+ Row(null, 113000.0, 3) ::
+ Row("Java", 20000.0, 0) ::
+ Row("Java", 30000.0, 0) ::
+ Row("Java", 50000.0, 1) ::
+ Row("dotNET", 5000.0, 0) ::
+ Row("dotNET", 10000.0, 0) ::
+ Row("dotNET", 48000.0, 0) ::
+ Row("dotNET", 63000.0, 1) :: Nil
+ )
+ }
+
test("cube") {
checkAnswer(
sql("select course, year, sum(earnings) from courseSales group by cube(course, year)"),
@@ -2103,6 +2133,10 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
sql("select course, year, grouping_id(course, year) from courseSales group by course, year")
}
assert(error.getMessage contains "grouping_id() can only be used with GroupingSets/Cube/Rollup")
+ error = intercept[AnalysisException] {
+ sql("select course, year, grouping__id from courseSales group by cube(course, year)")
+ }
+ assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead")
}
test("SPARK-13056: Null in map value causes NPE") {