aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib
diff options
context:
space:
mode:
authorHolden Karau <holden@us.ibm.com>2015-12-22 09:14:12 +0200
committerNick Pentreath <nick.pentreath@gmail.com>2015-12-22 09:14:12 +0200
commit969d5665bb1806703f948e8e7ab6133fca38c086 (patch)
treec8a6f941613843bc1db2dc268d5e720da81f200b /python/pyspark/mllib
parent2235cd44407e3b6b401fb84a2096ade042c51d36 (diff)
downloadspark-969d5665bb1806703f948e8e7ab6133fca38c086.tar.gz
spark-969d5665bb1806703f948e8e7ab6133fca38c086.tar.bz2
spark-969d5665bb1806703f948e8e7ab6133fca38c086.zip
[SPARK-12296][PYSPARK][MLLIB] Feature parity for pyspark mllib standard scaler model
Some methods are missing, such as ways to access the std, mean, etc. This PR is for feature parity for pyspark.mllib.feature.StandardScaler & StandardScalerModel. Author: Holden Karau <holden@us.ibm.com> Closes #10298 from holdenk/SPARK-12296-feature-parity-pyspark-mllib-StandardScalerModel.
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r--python/pyspark/mllib/feature.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index acd7ec57d6..6129353525 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -172,6 +172,38 @@ class StandardScalerModel(JavaVectorTransformer):
self.call("setWithStd", withStd)
return self
+ @property
+ @since('2.0.0')
+ def withStd(self):
+ """
+ Returns if the model scales the data to unit standard deviation.
+ """
+ return self.call("withStd")
+
+ @property
+ @since('2.0.0')
+ def withMean(self):
+ """
+ Returns if the model centers the data before scaling.
+ """
+ return self.call("withMean")
+
+ @property
+ @since('2.0.0')
+ def std(self):
+ """
+ Return the column standard deviation values.
+ """
+ return self.call("std")
+
+ @property
+ @since('2.0.0')
+ def mean(self):
+ """
+ Return the column mean values.
+ """
+ return self.call("mean")
+
class StandardScaler(object):
"""
@@ -196,6 +228,14 @@ class StandardScaler(object):
>>> for r in result.collect(): r
DenseVector([-0.7071, 0.7071, -0.7071])
DenseVector([0.7071, -0.7071, 0.7071])
+ >>> int(model.std[0])
+ 4
+ >>> int(model.mean[0]*10)
+ 9
+ >>> model.withStd
+ True
+ >>> model.withMean
+ True
.. versionadded:: 1.2.0
"""