diff options
author | MechCoder <manojkumarsivaraj334@gmail.com> | 2015-07-20 09:00:01 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-07-20 09:00:01 -0700 |
commit | d0b4e93f7e92ea59058cc457a5586a4d9a596d71 (patch) | |
tree | 9af6a3d90845d2cc36fa573125e8ed62770b7804 /python/pyspark/mllib/tests.py | |
parent | 3f7de7db4cf7c5e2824cb91087c5e9d4beb0f738 (diff) | |
download | spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.gz spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.bz2 spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.zip |
[SPARK-8996] [MLLIB] [PYSPARK] Python API for Kolmogorov-Smirnov Test
Python API for the KS-test
Statistics.kolmogorovSmirnovTest(data, distName, *params)
I'm not quite sure how to support the callable function since it is not serializable.
Author: MechCoder <manojkumarsivaraj334@gmail.com>
Closes #7430 from MechCoder/spark-8996 and squashes the following commits:
2dd009d [MechCoder] minor
021d233 [MechCoder] Remove one wrapper and other minor stuff
49d07ab [MechCoder] [SPARK-8996] [MLlib] Python API for Kolmogorov-Smirnov Test
Diffstat (limited to 'python/pyspark/mllib/tests.py')
-rw-r--r-- | python/pyspark/mllib/tests.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index f2eab5b18f..3f5a02af12 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -869,6 +869,25 @@ class ChiSqTestTests(MLlibTestCase): self.assertIsNotNone(chi[1000]) +class KolmogorovSmirnovTest(MLlibTestCase): + + def test_R_implementation_equivalence(self): + data = self.sc.parallelize([ + 1.1626852897838, -0.585924465893051, 1.78546500331661, -1.33259371048501, + -0.446566766553219, 0.569606122374976, -2.88971761441412, -0.869018343326555, + -0.461702683149641, -0.555540910137444, -0.0201353678515895, -0.150382224136063, + -0.628126755843964, 1.32322085193283, -1.52135057001199, -0.437427868856691, + 0.970577579543399, 0.0282226444247749, -0.0857821886527593, 0.389214404984942 + ]) + model = Statistics.kolmogorovSmirnovTest(data, "norm") + self.assertAlmostEqual(model.statistic, 0.189, 3) + self.assertAlmostEqual(model.pValue, 0.422, 3) + + model = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1) + self.assertAlmostEqual(model.statistic, 0.189, 3) + self.assertAlmostEqual(model.pValue, 0.422, 3) + + class SerDeTest(MLlibTestCase): def test_to_java_object_rdd(self): # SPARK-6660 data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0) |