aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/tests.py
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-07-20 09:00:01 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-20 09:00:01 -0700
commitd0b4e93f7e92ea59058cc457a5586a4d9a596d71 (patch)
tree9af6a3d90845d2cc36fa573125e8ed62770b7804 /python/pyspark/mllib/tests.py
parent3f7de7db4cf7c5e2824cb91087c5e9d4beb0f738 (diff)
downloadspark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.gz
spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.bz2
spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.zip
[SPARK-8996] [MLLIB] [PYSPARK] Python API for Kolmogorov-Smirnov Test
Python API for the KS-test Statistics.kolmogorovSmirnovTest(data, distName, *params) I'm not quite sure how to support the callable function since it is not serializable. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #7430 from MechCoder/spark-8996 and squashes the following commits: 2dd009d [MechCoder] minor 021d233 [MechCoder] Remove one wrapper and other minor stuff 49d07ab [MechCoder] [SPARK-8996] [MLlib] Python API for Kolmogorov-Smirnov Test
Diffstat (limited to 'python/pyspark/mllib/tests.py')
-rw-r--r--python/pyspark/mllib/tests.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index f2eab5b18f..3f5a02af12 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -869,6 +869,25 @@ class ChiSqTestTests(MLlibTestCase):
self.assertIsNotNone(chi[1000])
+class KolmogorovSmirnovTest(MLlibTestCase):
+
+ def test_R_implementation_equivalence(self):
+ data = self.sc.parallelize([
+ 1.1626852897838, -0.585924465893051, 1.78546500331661, -1.33259371048501,
+ -0.446566766553219, 0.569606122374976, -2.88971761441412, -0.869018343326555,
+ -0.461702683149641, -0.555540910137444, -0.0201353678515895, -0.150382224136063,
+ -0.628126755843964, 1.32322085193283, -1.52135057001199, -0.437427868856691,
+ 0.970577579543399, 0.0282226444247749, -0.0857821886527593, 0.389214404984942
+ ])
+ model = Statistics.kolmogorovSmirnovTest(data, "norm")
+ self.assertAlmostEqual(model.statistic, 0.189, 3)
+ self.assertAlmostEqual(model.pValue, 0.422, 3)
+
+ model = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
+ self.assertAlmostEqual(model.statistic, 0.189, 3)
+ self.assertAlmostEqual(model.pValue, 0.422, 3)
+
+
class SerDeTest(MLlibTestCase):
def test_to_java_object_rdd(self): # SPARK-6660
data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0)