diff options
author | Joseph K. Bradley <joseph@databricks.com> | 2016-02-24 23:15:36 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-02-24 23:15:36 -0800 |
commit | 13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f (patch) | |
tree | c24ffc89625ab7b5168edef1e4c0e087bcd49836 /python/pyspark/sql/tests.py | |
parent | 2b042577fb077865c3fce69c9d4eda22fde92673 (diff) | |
download | spark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.tar.gz spark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.tar.bz2 spark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.zip |
[SPARK-13479][SQL][PYTHON] Added Python API for approxQuantile
## What changes were proposed in this pull request?
* Scala DataFrameStatFunctions: Added version of approxQuantile taking a List instead of an Array, for Python compatbility
* Python DataFrame and DataFrameStatFunctions: Added approxQuantile
## How was this patch tested?
* unit test in sql/tests.py
Documentation was copied from the existing approxQuantile exactly.
Author: Joseph K. Bradley <joseph@databricks.com>
Closes #11356 from jkbradley/approx-quantile-python.
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r-- | python/pyspark/sql/tests.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index cc11c0f35c..90fd769691 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -669,6 +669,13 @@ class SQLTests(ReusedPySparkTestCase): functions.last(df2.id, True).alias('d')) self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect()) + def test_approxQuantile(self): + df = self.sc.parallelize([Row(a=i) for i in range(10)]).toDF() + aq = df.stat.approxQuantile("a", [0.1, 0.5, 0.9], 0.1) + self.assertTrue(isinstance(aq, list)) + self.assertEqual(len(aq), 3) + self.assertTrue(all(isinstance(q, float) for q in aq)) + def test_corr(self): import math df = self.sc.parallelize([Row(a=i, b=math.sqrt(i)) for i in range(10)]).toDF() |