diff options
author | Holden Karau <holden@us.ibm.com> | 2015-12-22 09:14:12 +0200 |
---|---|---|
committer | Nick Pentreath <nick.pentreath@gmail.com> | 2015-12-22 09:14:12 +0200 |
commit | 969d5665bb1806703f948e8e7ab6133fca38c086 (patch) | |
tree | c8a6f941613843bc1db2dc268d5e720da81f200b | |
parent | 2235cd44407e3b6b401fb84a2096ade042c51d36 (diff) | |
download | spark-969d5665bb1806703f948e8e7ab6133fca38c086.tar.gz spark-969d5665bb1806703f948e8e7ab6133fca38c086.tar.bz2 spark-969d5665bb1806703f948e8e7ab6133fca38c086.zip |
[SPARK-12296][PYSPARK][MLLIB] Feature parity for pyspark mllib standard scaler model
Some methods are missing, such as ways to access the std, mean, etc. This PR is for feature parity for pyspark.mllib.feature.StandardScaler & StandardScalerModel.
Author: Holden Karau <holden@us.ibm.com>
Closes #10298 from holdenk/SPARK-12296-feature-parity-pyspark-mllib-StandardScalerModel.
-rw-r--r-- | python/pyspark/mllib/feature.py | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index acd7ec57d6..6129353525 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -172,6 +172,38 @@ class StandardScalerModel(JavaVectorTransformer): self.call("setWithStd", withStd) return self + @property + @since('2.0.0') + def withStd(self): + """ + Returns if the model scales the data to unit standard deviation. + """ + return self.call("withStd") + + @property + @since('2.0.0') + def withMean(self): + """ + Returns if the model centers the data before scaling. + """ + return self.call("withMean") + + @property + @since('2.0.0') + def std(self): + """ + Return the column standard deviation values. + """ + return self.call("std") + + @property + @since('2.0.0') + def mean(self): + """ + Return the column mean values. + """ + return self.call("mean") + class StandardScaler(object): """ @@ -196,6 +228,14 @@ class StandardScaler(object): >>> for r in result.collect(): r DenseVector([-0.7071, 0.7071, -0.7071]) DenseVector([0.7071, -0.7071, 0.7071]) + >>> int(model.std[0]) + 4 + >>> int(model.mean[0]*10) + 9 + >>> model.withStd + True + >>> model.withMean + True .. versionadded:: 1.2.0 """ |