diff options
author | jiangxingbo <jiangxb1987@gmail.com> | 2016-10-26 23:51:16 +0200 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-10-26 23:51:16 +0200 |
commit | 5b7d403c1819c32a6a5b87d470f8de1a8ad7a987 (patch) | |
tree | 9af740e958e40706c94ca5224e1f6e437a499899 /sql/core | |
parent | dcdda19785a272969fb1e3ec18382403aaad6c91 (diff) | |
download | spark-5b7d403c1819c32a6a5b87d470f8de1a8ad7a987.tar.gz spark-5b7d403c1819c32a6a5b87d470f8de1a8ad7a987.tar.bz2 spark-5b7d403c1819c32a6a5b87d470f8de1a8ad7a987.zip |
[SPARK-18094][SQL][TESTS] Move group analytics test cases from `SQLQuerySuite` into a query file test.
## What changes were proposed in this pull request?
Currently we have several test cases for group analytics(ROLLUP/CUBE/GROUPING SETS) in `SQLQuerySuite`, should better move them into a query file test.
The following test cases are moved to `group-analytics.sql`:
```
test("rollup")
test("grouping sets when aggregate functions containing groupBy columns")
test("cube")
test("grouping sets")
test("grouping and grouping_id")
test("grouping and grouping_id in having")
test("grouping and grouping_id in sort")
```
This is followup work of #15582
## How was this patch tested?
Modified query file `group-analytics.sql`, which will be tested by `SQLQueryTestSuite`.
Author: jiangxingbo <jiangxb1987@gmail.com>
Closes #15624 from jiangxb1987/group-analytics-test.
Diffstat (limited to 'sql/core')
3 files changed, 290 insertions, 192 deletions
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql index 2f783495dd..f8135389a9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql @@ -10,4 +10,48 @@ SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE; -- ROLLUP on overlapping columns SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP; -SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP;
\ No newline at end of file +SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP; + +CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES +("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000) +AS courseSales(course, year, earnings); + +-- ROLLUP +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year; + +-- CUBE +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year; + +-- GROUPING SETS +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year); +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course); +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year); + +-- GROUPING SETS with aggregate functions containing groupBy columns +SELECT course, SUM(earnings) AS sum FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum; +SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum; + +-- GROUPING/GROUPING_ID +SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales +GROUP BY CUBE(course, year); +SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year; +SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year; +SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year); + +-- GROUPING/GROUPING_ID in having clause +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) +HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0; +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0; +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0; +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0; + +-- GROUPING/GROUPING_ID in orderBy clause +SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year) +ORDER BY GROUPING(course), GROUPING(year), course, year; +SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year) +ORDER BY GROUPING(course), GROUPING(year), course, year; +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course); +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course); +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id;
\ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out index 8ea7de809d..825e8f5488 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 5 +-- Number of queries: 26 -- !query 0 @@ -32,7 +32,6 @@ NULL 2 0 NULL NULL 3 - -- !query 2 SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE -- !query 2 schema @@ -85,3 +84,247 @@ struct<a:int,b:int,sum(b):bigint> 3 2 2 3 NULL 3 NULL NULL 9 + + +-- !query 5 +CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES +("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000) +AS courseSales(course, year, earnings) +-- !query 5 schema +struct<> +-- !query 5 output + + + +-- !query 6 +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year +-- !query 6 schema +struct<course:string,year:int,sum(earnings):bigint> +-- !query 6 output +NULL NULL 113000 +Java NULL 50000 +Java 2012 20000 +Java 2013 30000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2013 48000 + + +-- !query 7 +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year +-- !query 7 schema +struct<course:string,year:int,sum(earnings):bigint> +-- !query 7 output +NULL NULL 113000 +NULL 2012 35000 +NULL 2013 78000 +Java NULL 50000 +Java 2012 20000 +Java 2013 30000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2013 48000 + + +-- !query 8 +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year) +-- !query 8 schema +struct<course:string,year:int,sum(earnings):bigint> +-- !query 8 output +Java NULL 50000 +NULL 2012 35000 +NULL 2013 78000 +dotNET NULL 63000 + + +-- !query 9 +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course) +-- !query 9 schema +struct<course:string,year:int,sum(earnings):bigint> +-- !query 9 output +Java NULL 50000 +dotNET NULL 63000 + + +-- !query 10 +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year) +-- !query 10 schema +struct<course:string,year:int,sum(earnings):bigint> +-- !query 10 output +NULL 2012 35000 +NULL 2013 78000 + + +-- !query 11 +SELECT course, SUM(earnings) AS sum FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum +-- !query 11 schema +struct<course:string,sum:bigint> +-- !query 11 output +NULL 113000 +Java 20000 +Java 30000 +Java 50000 +dotNET 5000 +dotNET 10000 +dotNET 48000 +dotNET 63000 + + +-- !query 12 +SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum +-- !query 12 schema +struct<course:string,sum:bigint,grouping_id(course, earnings):int> +-- !query 12 output +NULL 113000 3 +Java 20000 0 +Java 30000 0 +Java 50000 1 +dotNET 5000 0 +dotNET 10000 0 +dotNET 48000 0 +dotNET 63000 1 + + +-- !query 13 +SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales +GROUP BY CUBE(course, year) +-- !query 13 schema +struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int> +-- !query 13 output +Java 2012 0 0 0 +Java 2013 0 0 0 +Java NULL 0 1 1 +NULL 2012 1 0 2 +NULL 2013 1 0 2 +NULL NULL 1 1 3 +dotNET 2012 0 0 0 +dotNET 2013 0 0 0 +dotNET NULL 0 1 1 + + +-- !query 14 +SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year +-- !query 14 schema +struct<> +-- !query 14 output +org.apache.spark.sql.AnalysisException +grouping() can only be used with GroupingSets/Cube/Rollup; + + +-- !query 15 +SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year +-- !query 15 schema +struct<> +-- !query 15 output +org.apache.spark.sql.AnalysisException +grouping_id() can only be used with GroupingSets/Cube/Rollup; + + +-- !query 16 +SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) +-- !query 16 schema +struct<> +-- !query 16 output +org.apache.spark.sql.AnalysisException +grouping__id is deprecated; use grouping_id() instead; + + +-- !query 17 +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) +HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 +-- !query 17 schema +struct<course:string,year:int> +-- !query 17 output +Java NULL +NULL NULL +dotNET NULL + + +-- !query 18 +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0 +-- !query 18 schema +struct<> +-- !query 18 output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; + + +-- !query 19 +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0 +-- !query 19 schema +struct<> +-- !query 19 output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; + + +-- !query 20 +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0 +-- !query 20 schema +struct<> +-- !query 20 output +org.apache.spark.sql.AnalysisException +grouping__id is deprecated; use grouping_id() instead; + + +-- !query 21 +SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year) +ORDER BY GROUPING(course), GROUPING(year), course, year +-- !query 21 schema +struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint> +-- !query 21 output +Java 2012 0 0 +Java 2013 0 0 +dotNET 2012 0 0 +dotNET 2013 0 0 +Java NULL 0 1 +dotNET NULL 0 1 +NULL 2012 1 0 +NULL 2013 1 0 +NULL NULL 1 1 + + +-- !query 22 +SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year) +ORDER BY GROUPING(course), GROUPING(year), course, year +-- !query 22 schema +struct<course:string,year:int,grouping_id(course, year):int> +-- !query 22 output +Java 2012 0 +Java 2013 0 +dotNET 2012 0 +dotNET 2013 0 +Java NULL 1 +dotNET NULL 1 +NULL 2012 2 +NULL 2013 2 +NULL NULL 3 + + +-- !query 23 +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course) +-- !query 23 schema +struct<> +-- !query 23 output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; + + +-- !query 24 +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course) +-- !query 24 schema +struct<> +-- !query 24 output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; + + +-- !query 25 +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id +-- !query 25 schema +struct<> +-- !query 25 output +org.apache.spark.sql.AnalysisException +grouping__id is deprecated; use grouping_id() instead; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index bd4c25315c..1a43d0b220 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2005,195 +2005,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row(false) :: Row(true) :: Nil) } - test("rollup") { - checkAnswer( - sql("select course, year, sum(earnings) from courseSales group by rollup(course, year)" + - " order by course, year"), - Row(null, null, 113000.0) :: - Row("Java", null, 50000.0) :: - Row("Java", 2012, 20000.0) :: - Row("Java", 2013, 30000.0) :: - Row("dotNET", null, 63000.0) :: - Row("dotNET", 2012, 15000.0) :: - Row("dotNET", 2013, 48000.0) :: Nil - ) - } - - test("grouping sets when aggregate functions containing groupBy columns") { - checkAnswer( - sql("select course, sum(earnings) as sum from courseSales group by course, earnings " + - "grouping sets((), (course), (course, earnings)) " + - "order by course, sum"), - Row(null, 113000.0) :: - Row("Java", 20000.0) :: - Row("Java", 30000.0) :: - Row("Java", 50000.0) :: - Row("dotNET", 5000.0) :: - Row("dotNET", 10000.0) :: - Row("dotNET", 48000.0) :: - Row("dotNET", 63000.0) :: Nil - ) - - checkAnswer( - sql("select course, sum(earnings) as sum, grouping_id(course, earnings) from courseSales " + - "group by course, earnings grouping sets((), (course), (course, earnings)) " + - "order by course, sum"), - Row(null, 113000.0, 3) :: - Row("Java", 20000.0, 0) :: - Row("Java", 30000.0, 0) :: - Row("Java", 50000.0, 1) :: - Row("dotNET", 5000.0, 0) :: - Row("dotNET", 10000.0, 0) :: - Row("dotNET", 48000.0, 0) :: - Row("dotNET", 63000.0, 1) :: Nil - ) - } - - test("cube") { - checkAnswer( - sql("select course, year, sum(earnings) from courseSales group by cube(course, year)"), - Row("Java", 2012, 20000.0) :: - Row("Java", 2013, 30000.0) :: - Row("Java", null, 50000.0) :: - Row("dotNET", 2012, 15000.0) :: - Row("dotNET", 2013, 48000.0) :: - Row("dotNET", null, 63000.0) :: - Row(null, 2012, 35000.0) :: - Row(null, 2013, 78000.0) :: - Row(null, null, 113000.0) :: Nil - ) - } - - test("grouping sets") { - checkAnswer( - sql("select course, year, sum(earnings) from courseSales group by course, year " + - "grouping sets(course, year)"), - Row("Java", null, 50000.0) :: - Row("dotNET", null, 63000.0) :: - Row(null, 2012, 35000.0) :: - Row(null, 2013, 78000.0) :: Nil - ) - - checkAnswer( - sql("select course, year, sum(earnings) from courseSales group by course, year " + - "grouping sets(course)"), - Row("Java", null, 50000.0) :: - Row("dotNET", null, 63000.0) :: Nil - ) - - checkAnswer( - sql("select course, year, sum(earnings) from courseSales group by course, year " + - "grouping sets(year)"), - Row(null, 2012, 35000.0) :: - Row(null, 2013, 78000.0) :: Nil - ) - } - - test("grouping and grouping_id") { - checkAnswer( - sql("select course, year, grouping(course), grouping(year), grouping_id(course, year)" + - " from courseSales group by cube(course, year)"), - Row("Java", 2012, 0, 0, 0) :: - Row("Java", 2013, 0, 0, 0) :: - Row("Java", null, 0, 1, 1) :: - Row("dotNET", 2012, 0, 0, 0) :: - Row("dotNET", 2013, 0, 0, 0) :: - Row("dotNET", null, 0, 1, 1) :: - Row(null, 2012, 1, 0, 2) :: - Row(null, 2013, 1, 0, 2) :: - Row(null, null, 1, 1, 3) :: Nil - ) - - var error = intercept[AnalysisException] { - sql("select course, year, grouping(course) from courseSales group by course, year") - } - assert(error.getMessage contains "grouping() can only be used with GroupingSets/Cube/Rollup") - error = intercept[AnalysisException] { - sql("select course, year, grouping_id(course, year) from courseSales group by course, year") - } - assert(error.getMessage contains "grouping_id() can only be used with GroupingSets/Cube/Rollup") - error = intercept[AnalysisException] { - sql("select course, year, grouping__id from courseSales group by cube(course, year)") - } - assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead") - } - - test("grouping and grouping_id in having") { - checkAnswer( - sql("select course, year from courseSales group by cube(course, year)" + - " having grouping(year) = 1 and grouping_id(course, year) > 0"), - Row("Java", null) :: - Row("dotNET", null) :: - Row(null, null) :: Nil - ) - - var error = intercept[AnalysisException] { - sql("select course, year from courseSales group by course, year" + - " having grouping(course) > 0") - } - assert(error.getMessage contains - "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup") - error = intercept[AnalysisException] { - sql("select course, year from courseSales group by course, year" + - " having grouping_id(course, year) > 0") - } - assert(error.getMessage contains - "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup") - error = intercept[AnalysisException] { - sql("select course, year from courseSales group by cube(course, year)" + - " having grouping__id > 0") - } - assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead") - } - - test("grouping and grouping_id in sort") { - checkAnswer( - sql("select course, year, grouping(course), grouping(year) from courseSales" + - " group by cube(course, year) order by grouping_id(course, year), course, year"), - Row("Java", 2012, 0, 0) :: - Row("Java", 2013, 0, 0) :: - Row("dotNET", 2012, 0, 0) :: - Row("dotNET", 2013, 0, 0) :: - Row("Java", null, 0, 1) :: - Row("dotNET", null, 0, 1) :: - Row(null, 2012, 1, 0) :: - Row(null, 2013, 1, 0) :: - Row(null, null, 1, 1) :: Nil - ) - - checkAnswer( - sql("select course, year, grouping_id(course, year) from courseSales" + - " group by cube(course, year) order by grouping(course), grouping(year), course, year"), - Row("Java", 2012, 0) :: - Row("Java", 2013, 0) :: - Row("dotNET", 2012, 0) :: - Row("dotNET", 2013, 0) :: - Row("Java", null, 1) :: - Row("dotNET", null, 1) :: - Row(null, 2012, 2) :: - Row(null, 2013, 2) :: - Row(null, null, 3) :: Nil - ) - - var error = intercept[AnalysisException] { - sql("select course, year from courseSales group by course, year" + - " order by grouping(course)") - } - assert(error.getMessage contains - "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup") - error = intercept[AnalysisException] { - sql("select course, year from courseSales group by course, year" + - " order by grouping_id(course, year)") - } - assert(error.getMessage contains - "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup") - error = intercept[AnalysisException] { - sql("select course, year from courseSales group by cube(course, year)" + - " order by grouping__id") - } - assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead") - } - test("filter on a grouping column that is not presented in SELECT") { checkAnswer( sql("select count(1) from (select 1 as a) t group by a having a > 0"), |