diff options
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r-- | python/pyspark/sql/tests.py | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 2fea4ac41f..86cad4b363 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -895,11 +895,32 @@ class SQLTests(ReusedPySparkTestCase): self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect()) def test_approxQuantile(self): - df = self.sc.parallelize([Row(a=i) for i in range(10)]).toDF() + df = self.sc.parallelize([Row(a=i, b=i+10) for i in range(10)]).toDF() aq = df.stat.approxQuantile("a", [0.1, 0.5, 0.9], 0.1) self.assertTrue(isinstance(aq, list)) self.assertEqual(len(aq), 3) self.assertTrue(all(isinstance(q, float) for q in aq)) + aqs = df.stat.approxQuantile(["a", "b"], [0.1, 0.5, 0.9], 0.1) + self.assertTrue(isinstance(aqs, list)) + self.assertEqual(len(aqs), 2) + self.assertTrue(isinstance(aqs[0], list)) + self.assertEqual(len(aqs[0]), 3) + self.assertTrue(all(isinstance(q, float) for q in aqs[0])) + self.assertTrue(isinstance(aqs[1], list)) + self.assertEqual(len(aqs[1]), 3) + self.assertTrue(all(isinstance(q, float) for q in aqs[1])) + aqt = df.stat.approxQuantile(("a", "b"), [0.1, 0.5, 0.9], 0.1) + self.assertTrue(isinstance(aqt, list)) + self.assertEqual(len(aqt), 2) + self.assertTrue(isinstance(aqt[0], list)) + self.assertEqual(len(aqt[0]), 3) + self.assertTrue(all(isinstance(q, float) for q in aqt[0])) + self.assertTrue(isinstance(aqt[1], list)) + self.assertEqual(len(aqt[1]), 3) + self.assertTrue(all(isinstance(q, float) for q in aqt[1])) + self.assertRaises(ValueError, lambda: df.stat.approxQuantile(123, [0.1, 0.9], 0.1)) + self.assertRaises(ValueError, lambda: df.stat.approxQuantile(("a", 123), [0.1, 0.9], 0.1)) + self.assertRaises(ValueError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1)) def test_corr(self): import math |