From 63f077fbe50b4094340e9915db41d7dbdba52975 Mon Sep 17 00:00:00 2001 From: Zheng RuiFeng Date: Tue, 21 Mar 2017 08:45:59 -0700 Subject: [SPARK-20041][DOC] Update docs for NaN handling in approxQuantile ## What changes were proposed in this pull request? Update docs for NaN handling in approxQuantile. ## How was this patch tested? existing tests. Author: Zheng RuiFeng Closes #17369 from zhengruifeng/doc_quantiles_nan. --- python/pyspark/sql/dataframe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'python/pyspark/sql/dataframe.py') diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index bb6df22682..a24512f53c 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1384,7 +1384,8 @@ class DataFrame(object): Space-efficient Online Computation of Quantile Summaries]] by Greenwald and Khanna. - Note that rows containing any null values will be removed before calculation. + Note that null values will be ignored in numerical columns before calculation. + For columns only containing null values, an empty list is returned. :param col: str, list. Can be a single column name, or a list of names for multiple columns. -- cgit v1.2.3