aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/tests.py
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2016-02-24 23:15:36 -0800
committerXiangrui Meng <meng@databricks.com>2016-02-24 23:15:36 -0800
commit13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f (patch)
treec24ffc89625ab7b5168edef1e4c0e087bcd49836 /python/pyspark/sql/tests.py
parent2b042577fb077865c3fce69c9d4eda22fde92673 (diff)
downloadspark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.tar.gz
spark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.tar.bz2
spark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.zip
[SPARK-13479][SQL][PYTHON] Added Python API for approxQuantile
## What changes were proposed in this pull request? * Scala DataFrameStatFunctions: Added version of approxQuantile taking a List instead of an Array, for Python compatbility * Python DataFrame and DataFrameStatFunctions: Added approxQuantile ## How was this patch tested? * unit test in sql/tests.py Documentation was copied from the existing approxQuantile exactly. Author: Joseph K. Bradley <joseph@databricks.com> Closes #11356 from jkbradley/approx-quantile-python.
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r--python/pyspark/sql/tests.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index cc11c0f35c..90fd769691 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -669,6 +669,13 @@ class SQLTests(ReusedPySparkTestCase):
functions.last(df2.id, True).alias('d'))
self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect())
+ def test_approxQuantile(self):
+ df = self.sc.parallelize([Row(a=i) for i in range(10)]).toDF()
+ aq = df.stat.approxQuantile("a", [0.1, 0.5, 0.9], 0.1)
+ self.assertTrue(isinstance(aq, list))
+ self.assertEqual(len(aq), 3)
+ self.assertTrue(all(isinstance(q, float) for q in aq))
+
def test_corr(self):
import math
df = self.sc.parallelize([Row(a=i, b=math.sqrt(i)) for i in range(10)]).toDF()