aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/dataframe.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r--python/pyspark/sql/dataframe.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index f30a92dfc8..17448b38c3 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -934,10 +934,11 @@ class DataFrame(object):
def crosstab(self, col1, col2):
"""
Computes a pair-wise frequency table of the given columns. Also known as a contingency
- table. The number of distinct values for each column should be less than 1e4. The first
- column of each row will be the distinct values of `col1` and the column names will be the
- distinct values of `col2`. The name of the first column will be `$col1_$col2`. Pairs that
- have no occurrences will have `null` as their counts.
+ table. The number of distinct values for each column should be less than 1e4. At most 1e6
+ non-zero pair frequencies will be returned.
+ The first column of each row will be the distinct values of `col1` and the column names
+ will be the distinct values of `col2`. The name of the first column will be `$col1_$col2`.
+ Pairs that have no occurrences will have `null` as their counts.
:func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases.
:param col1: The name of the first column. Distinct items will make the first item of