aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-07-07 12:35:40 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-07 12:35:40 -0700
commit35d781e71b68eb6da7f49fdae40fa6c4f8e27060 (patch)
tree4f7032a63b596587608ecec4fb6e96ebd8f56520
parent3336c7b148ad543d1f9b64ca2b559ea04930f5be (diff)
downloadspark-35d781e71b68eb6da7f49fdae40fa6c4f8e27060.tar.gz
spark-35d781e71b68eb6da7f49fdae40fa6c4f8e27060.tar.bz2
spark-35d781e71b68eb6da7f49fdae40fa6c4f8e27060.zip
[SPARK-8704] [ML] [PySpark] Add missing methods in StandardScaler
Add std, mean to StandardScalerModel getVectors, findSynonyms to Word2Vec Model setFeatures and getFeatures to hashingTF Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #7086 from MechCoder/missing_model_methods and squashes the following commits: 9fbae90 [MechCoder] Add type 6e3d6b2 [MechCoder] [SPARK-8704] Add missing methods in StandardScaler (ML and PySpark)
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala6
-rw-r--r--python/pyspark/ml/feature.py18
2 files changed, 24 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index ca3c1cfb56..72b545e5db 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -106,6 +106,12 @@ class StandardScalerModel private[ml] (
scaler: feature.StandardScalerModel)
extends Model[StandardScalerModel] with StandardScalerParams {
+ /** Standard deviation of the StandardScalerModel */
+ val std: Vector = scaler.std
+
+ /** Mean of the StandardScalerModel */
+ val mean: Vector = scaler.mean
+
/** @group setParam */
def setInputCol(value: String): this.type = set(inputCol, value)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 8804dace84..9bca7cc000 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -627,6 +627,10 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
>>> df = sqlContext.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
>>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled")
>>> model = standardScaler.fit(df)
+ >>> model.mean
+ DenseVector([1.0])
+ >>> model.std
+ DenseVector([1.4142])
>>> model.transform(df).collect()[1].scaled
DenseVector([1.4142])
"""
@@ -692,6 +696,20 @@ class StandardScalerModel(JavaModel):
Model fitted by StandardScaler.
"""
+ @property
+ def std(self):
+ """
+ Standard deviation of the StandardScalerModel.
+ """
+ return self._call_java("std")
+
+ @property
+ def mean(self):
+ """
+ Mean of the StandardScalerModel.
+ """
+ return self._call_java("mean")
+
@inherit_doc
class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):