diff options
author | lewuathe <lewuathe@me.com> | 2015-04-07 14:36:57 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-04-07 14:36:57 -0700 |
commit | fc957dc78138e72036dbbadc9a54f155d318c038 (patch) | |
tree | 38fd6dcc699e3954183a75646ea7b73347c22a83 /python | |
parent | e6f08fb42fda35952ea8b005170750ae551dc7d9 (diff) | |
download | spark-fc957dc78138e72036dbbadc9a54f155d318c038.tar.gz spark-fc957dc78138e72036dbbadc9a54f155d318c038.tar.bz2 spark-fc957dc78138e72036dbbadc9a54f155d318c038.zip |
[SPARK-6720][MLLIB] PySpark MultivariateStatisticalSummary unit test for normL1...
... and normL2.
Add test cases to insufficient unit test for `normL1` and `normL2`.
Ref: https://github.com/apache/spark/pull/5359
Author: lewuathe <lewuathe@me.com>
Closes #5374 from Lewuathe/SPARK-6720 and squashes the following commits:
5541b24 [lewuathe] More accurate tests
dc5718c [lewuathe] [SPARK-6720] PySpark MultivariateStatisticalSummary unit test for normL1 and normL2
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/mllib/tests.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 47dad7d12e..61ef398487 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -363,6 +363,13 @@ class StatTests(PySparkTestCase): self.assertEqual(10, len(summary.normL1())) self.assertEqual(10, len(summary.normL2())) + data2 = self.sc.parallelize(xrange(10)).map(lambda x: Vectors.dense(x)) + summary2 = Statistics.colStats(data2) + self.assertEqual(array([45.0]), summary2.normL1()) + import math + expectedNormL2 = math.sqrt(sum(map(lambda x: x*x, xrange(10)))) + self.assertTrue(math.fabs(summary2.normL2()[0] - expectedNormL2) < 1e-14) + class VectorUDTTests(PySparkTestCase): |