From adce5ee721c6a844ff21dfcd8515859458fe611d Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 5 Mar 2016 19:25:03 +0800 Subject: [SPARK-12720][SQL] SQL Generation Support for Cube, Rollup, and Grouping Sets #### What changes were proposed in this pull request? This PR is for supporting SQL generation for cube, rollup and grouping sets. For example, a query using rollup: ```SQL SELECT count(*) as cnt, key % 5, grouping_id() FROM t1 GROUP BY key % 5 WITH ROLLUP ``` Original logical plan: ``` Aggregate [(key#17L % cast(5 as bigint))#47L,grouping__id#46], [(count(1),mode=Complete,isDistinct=false) AS cnt#43L, (key#17L % cast(5 as bigint))#47L AS _c1#45L, grouping__id#46 AS _c2#44] +- Expand [List(key#17L, value#18, (key#17L % cast(5 as bigint))#47L, 0), List(key#17L, value#18, null, 1)], [key#17L,value#18,(key#17L % cast(5 as bigint))#47L,grouping__id#46] +- Project [key#17L, value#18, (key#17L % cast(5 as bigint)) AS (key#17L % cast(5 as bigint))#47L] +- Subquery t1 +- Relation[key#17L,value#18] ParquetRelation ``` Converted SQL: ```SQL SELECT count( 1) AS `cnt`, (`t1`.`key` % CAST(5 AS BIGINT)), grouping_id() AS `_c2` FROM `default`.`t1` GROUP BY (`t1`.`key` % CAST(5 AS BIGINT)) GROUPING SETS (((`t1`.`key` % CAST(5 AS BIGINT))), ()) ``` #### How was the this patch tested? Added eight test cases in `LogicalPlanToSQLSuite`. Author: gatorsmile Author: xiaoli Author: Xiao Li Closes #11283 from gatorsmile/groupingSetsToSQL. --- python/pyspark/sql/functions.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'python/pyspark/sql/functions.py') diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 92e724fef4..88924e2981 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -348,13 +348,13 @@ def grouping_id(*cols): grouping columns). >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show() - +-----+------------+--------+ - | name|groupingid()|sum(age)| - +-----+------------+--------+ - | null| 1| 7| - |Alice| 0| 2| - | Bob| 0| 5| - +-----+------------+--------+ + +-----+-------------+--------+ + | name|grouping_id()|sum(age)| + +-----+-------------+--------+ + | null| 1| 7| + |Alice| 0| 2| + | Bob| 0| 5| + +-----+-------------+--------+ """ sc = SparkContext._active_spark_context jc = sc._jvm.functions.grouping_id(_to_seq(sc, cols, _to_java_column)) -- cgit v1.2.3