aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorAndrew Ray <ray.andrew@gmail.com>2015-12-07 15:01:00 -0800
committerYin Huai <yhuai@databricks.com>2015-12-07 15:01:00 -0800
commit36282f78b888743066843727426c6d806231aa97 (patch)
tree896349b7ebd435e5226134cb7f7908f45b67a8ec /python
parent84b809445f39b9030f272528bdaa39d1559cbc6e (diff)
downloadspark-36282f78b888743066843727426c6d806231aa97.tar.gz
spark-36282f78b888743066843727426c6d806231aa97.tar.bz2
spark-36282f78b888743066843727426c6d806231aa97.zip
[SPARK-12184][PYTHON] Make python api doc for pivot consistant with scala doc
In SPARK-11946 the API for pivot was changed a bit and got updated doc, the doc changes were not made for the python api though. This PR updates the python doc to be consistent. Author: Andrew Ray <ray.andrew@gmail.com> Closes #10176 from aray/sql-pivot-python-doc.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/group.py14
1 files changed, 9 insertions, 5 deletions
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 1911588309..9ca303a974 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -169,16 +169,20 @@ class GroupedData(object):
@since(1.6)
def pivot(self, pivot_col, values=None):
- """Pivots a column of the current DataFrame and perform the specified aggregation.
+ """
+ Pivots a column of the current [[DataFrame]] and perform the specified aggregation.
+ There are two versions of pivot function: one that requires the caller to specify the list
+ of distinct values to pivot on, and one that does not. The latter is more concise but less
+ efficient, because Spark needs to first compute the list of distinct values internally.
- :param pivot_col: Column to pivot
- :param values: Optional list of values of pivot column that will be translated to columns in
- the output DataFrame. If values are not provided the method will do an immediate call
- to .distinct() on the pivot column.
+ :param pivot_col: Name of the column to pivot.
+ :param values: List of values that will be translated to columns in the output DataFrame.
+ // Compute the sum of earnings for each year by course with each course as a separate column
>>> df4.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").collect()
[Row(year=2012, dotNET=15000, Java=20000), Row(year=2013, dotNET=48000, Java=30000)]
+ // Or without specifying column values (less efficient)
>>> df4.groupBy("year").pivot("course").sum("earnings").collect()
[Row(year=2012, Java=20000, dotNET=15000), Row(year=2013, Java=30000, dotNET=48000)]
"""