aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/dataframe.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r--python/pyspark/sql/dataframe.py48
1 files changed, 46 insertions, 2 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 3fc7d0048e..132db90e69 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -801,9 +801,53 @@ class DataFrame(object):
>>> df.groupBy(['name', df.age]).count().collect()
[Row(name=u'Bob', age=5, count=1), Row(name=u'Alice', age=2, count=1)]
"""
- jdf = self._jdf.groupBy(self._jcols(*cols))
+ jgd = self._jdf.groupBy(self._jcols(*cols))
from pyspark.sql.group import GroupedData
- return GroupedData(jdf, self.sql_ctx)
+ return GroupedData(jgd, self.sql_ctx)
+
+ @since(1.4)
+ def rollup(self, *cols):
+ """
+ Create a multi-dimensional rollup for the current :class:`DataFrame` using
+ the specified columns, so we can run aggregation on them.
+
+ >>> df.rollup('name', df.age).count().show()
+ +-----+----+-----+
+ | name| age|count|
+ +-----+----+-----+
+ |Alice|null| 1|
+ | Bob| 5| 1|
+ | Bob|null| 1|
+ | null|null| 2|
+ |Alice| 2| 1|
+ +-----+----+-----+
+ """
+ jgd = self._jdf.rollup(self._jcols(*cols))
+ from pyspark.sql.group import GroupedData
+ return GroupedData(jgd, self.sql_ctx)
+
+ @since(1.4)
+ def cube(self, *cols):
+ """
+ Create a multi-dimensional cube for the current :class:`DataFrame` using
+ the specified columns, so we can run aggregation on them.
+
+ >>> df.cube('name', df.age).count().show()
+ +-----+----+-----+
+ | name| age|count|
+ +-----+----+-----+
+ | null| 2| 1|
+ |Alice|null| 1|
+ | Bob| 5| 1|
+ | Bob|null| 1|
+ | null| 5| 1|
+ | null|null| 2|
+ |Alice| 2| 1|
+ +-----+----+-----+
+ """
+ jgd = self._jdf.cube(self._jcols(*cols))
+ from pyspark.sql.group import GroupedData
+ return GroupedData(jgd, self.sql_ctx)
@since(1.3)
def agg(self, *exprs):