aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlewuathe <lewuathe@me.com>2015-04-07 14:36:57 -0700
committerXiangrui Meng <meng@databricks.com>2015-04-07 14:36:57 -0700
commitfc957dc78138e72036dbbadc9a54f155d318c038 (patch)
tree38fd6dcc699e3954183a75646ea7b73347c22a83
parente6f08fb42fda35952ea8b005170750ae551dc7d9 (diff)
downloadspark-fc957dc78138e72036dbbadc9a54f155d318c038.tar.gz
spark-fc957dc78138e72036dbbadc9a54f155d318c038.tar.bz2
spark-fc957dc78138e72036dbbadc9a54f155d318c038.zip
[SPARK-6720][MLLIB] PySpark MultivariateStatisticalSummary unit test for normL1...
... and normL2. Add test cases to insufficient unit test for `normL1` and `normL2`. Ref: https://github.com/apache/spark/pull/5359 Author: lewuathe <lewuathe@me.com> Closes #5374 from Lewuathe/SPARK-6720 and squashes the following commits: 5541b24 [lewuathe] More accurate tests dc5718c [lewuathe] [SPARK-6720] PySpark MultivariateStatisticalSummary unit test for normL1 and normL2
-rw-r--r--python/pyspark/mllib/tests.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 47dad7d12e..61ef398487 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -363,6 +363,13 @@ class StatTests(PySparkTestCase):
self.assertEqual(10, len(summary.normL1()))
self.assertEqual(10, len(summary.normL2()))
+ data2 = self.sc.parallelize(xrange(10)).map(lambda x: Vectors.dense(x))
+ summary2 = Statistics.colStats(data2)
+ self.assertEqual(array([45.0]), summary2.normL1())
+ import math
+ expectedNormL2 = math.sqrt(sum(map(lambda x: x*x, xrange(10))))
+ self.assertTrue(math.fabs(summary2.normL2()[0] - expectedNormL2) < 1e-14)
+
class VectorUDTTests(PySparkTestCase):