aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-06-01 21:29:39 -0700
committerReynold Xin <rxin@databricks.com>2015-06-01 21:29:39 -0700
commit4c868b9943a2d86107d1f15f8df9830aac36fb75 (patch)
treee6a91fa1347d5050a4d4f6a9bbb502813fdd4708 /python
parentcae9306c4f437c722baa57593fe83f4b7d82dbff (diff)
downloadspark-4c868b9943a2d86107d1f15f8df9830aac36fb75.tar.gz
spark-4c868b9943a2d86107d1f15f8df9830aac36fb75.tar.bz2
spark-4c868b9943a2d86107d1f15f8df9830aac36fb75.zip
[minor doc] Add exploratory data analysis warning for DataFrame.stat.freqItem API
Author: Reynold Xin <rxin@databricks.com> Closes #6569 from rxin/freqItemsWarning and squashes the following commits: 7eec145 [Reynold Xin] [minor doc] Add exploratory data analysis warning for DataFrame.stat.freqItem API.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/dataframe.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 936487519a..a82b6b87c4 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1170,6 +1170,9 @@ class DataFrame(object):
"http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou".
:func:`DataFrame.freqItems` and :func:`DataFrameStatFunctions.freqItems` are aliases.
+ This function is meant for exploratory data analysis, as we make no guarantee about the
+ backward compatibility of the schema of the resulting DataFrame.
+
:param cols: Names of the columns to calculate frequent items for as a list or tuple of
strings.
:param support: The frequency with which to consider an item 'frequent'. Default is 1%.