From d0b4e93f7e92ea59058cc457a5586a4d9a596d71 Mon Sep 17 00:00:00 2001 From: MechCoder Date: Mon, 20 Jul 2015 09:00:01 -0700 Subject: [SPARK-8996] [MLLIB] [PYSPARK] Python API for Kolmogorov-Smirnov Test Python API for the KS-test Statistics.kolmogorovSmirnovTest(data, distName, *params) I'm not quite sure how to support the callable function since it is not serializable. Author: MechCoder Closes #7430 from MechCoder/spark-8996 and squashes the following commits: 2dd009d [MechCoder] minor 021d233 [MechCoder] Remove one wrapper and other minor stuff 49d07ab [MechCoder] [SPARK-8996] [MLlib] Python API for Kolmogorov-Smirnov Test --- python/pyspark/mllib/tests.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'python/pyspark/mllib/tests.py') diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index f2eab5b18f..3f5a02af12 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -869,6 +869,25 @@ class ChiSqTestTests(MLlibTestCase): self.assertIsNotNone(chi[1000]) +class KolmogorovSmirnovTest(MLlibTestCase): + + def test_R_implementation_equivalence(self): + data = self.sc.parallelize([ + 1.1626852897838, -0.585924465893051, 1.78546500331661, -1.33259371048501, + -0.446566766553219, 0.569606122374976, -2.88971761441412, -0.869018343326555, + -0.461702683149641, -0.555540910137444, -0.0201353678515895, -0.150382224136063, + -0.628126755843964, 1.32322085193283, -1.52135057001199, -0.437427868856691, + 0.970577579543399, 0.0282226444247749, -0.0857821886527593, 0.389214404984942 + ]) + model = Statistics.kolmogorovSmirnovTest(data, "norm") + self.assertAlmostEqual(model.statistic, 0.189, 3) + self.assertAlmostEqual(model.pValue, 0.422, 3) + + model = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1) + self.assertAlmostEqual(model.statistic, 0.189, 3) + self.assertAlmostEqual(model.pValue, 0.422, 3) + + class SerDeTest(MLlibTestCase): def test_to_java_object_rdd(self): # SPARK-6660 data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0) -- cgit v1.2.3