aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r--python/pyspark/sql/tests.py23
1 files changed, 22 insertions, 1 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 2fea4ac41f..86cad4b363 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -895,11 +895,32 @@ class SQLTests(ReusedPySparkTestCase):
self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect())
def test_approxQuantile(self):
- df = self.sc.parallelize([Row(a=i) for i in range(10)]).toDF()
+ df = self.sc.parallelize([Row(a=i, b=i+10) for i in range(10)]).toDF()
aq = df.stat.approxQuantile("a", [0.1, 0.5, 0.9], 0.1)
self.assertTrue(isinstance(aq, list))
self.assertEqual(len(aq), 3)
self.assertTrue(all(isinstance(q, float) for q in aq))
+ aqs = df.stat.approxQuantile(["a", "b"], [0.1, 0.5, 0.9], 0.1)
+ self.assertTrue(isinstance(aqs, list))
+ self.assertEqual(len(aqs), 2)
+ self.assertTrue(isinstance(aqs[0], list))
+ self.assertEqual(len(aqs[0]), 3)
+ self.assertTrue(all(isinstance(q, float) for q in aqs[0]))
+ self.assertTrue(isinstance(aqs[1], list))
+ self.assertEqual(len(aqs[1]), 3)
+ self.assertTrue(all(isinstance(q, float) for q in aqs[1]))
+ aqt = df.stat.approxQuantile(("a", "b"), [0.1, 0.5, 0.9], 0.1)
+ self.assertTrue(isinstance(aqt, list))
+ self.assertEqual(len(aqt), 2)
+ self.assertTrue(isinstance(aqt[0], list))
+ self.assertEqual(len(aqt[0]), 3)
+ self.assertTrue(all(isinstance(q, float) for q in aqt[0]))
+ self.assertTrue(isinstance(aqt[1], list))
+ self.assertEqual(len(aqt[1]), 3)
+ self.assertTrue(all(isinstance(q, float) for q in aqt[1]))
+ self.assertRaises(ValueError, lambda: df.stat.approxQuantile(123, [0.1, 0.9], 0.1))
+ self.assertRaises(ValueError, lambda: df.stat.approxQuantile(("a", 123), [0.1, 0.9], 0.1))
+ self.assertRaises(ValueError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1))
def test_corr(self):
import math