From acffc43455d7b3e4000be4ff0175b8ea19cd280b Mon Sep 17 00:00:00 2001 From: lewuathe Date: Sun, 5 Apr 2015 16:13:31 -0700 Subject: [SPARK-6262][MLLIB]Implement missing methods for MultivariateStatisticalSummary Add below methods in pyspark for MultivariateStatisticalSummary - normL1 - normL2 Author: lewuathe Closes #5359 from Lewuathe/SPARK-6262 and squashes the following commits: cbe439e [lewuathe] Implement missing methods for MultivariateStatisticalSummary --- python/pyspark/mllib/stat/_statistics.py | 6 ++++++ python/pyspark/mllib/tests.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index 218ac148ca..1d83e9d483 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -49,6 +49,12 @@ class MultivariateStatisticalSummary(JavaModelWrapper): def min(self): return self.call("min").toArray() + def normL1(self): + return self.call("normL1").toArray() + + def normL2(self): + return self.call("normL2").toArray() + class Statistics(object): diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index dd3b66ce67..47dad7d12e 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -357,6 +357,12 @@ class StatTests(PySparkTestCase): summary = Statistics.colStats(data) self.assertEqual(10, summary.count()) + def test_col_norms(self): + data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10) + summary = Statistics.colStats(data) + self.assertEqual(10, len(summary.normL1())) + self.assertEqual(10, len(summary.normL2())) + class VectorUDTTests(PySparkTestCase): -- cgit v1.2.3