[SPARK-17001][ML] Enable standardScaler to standardize sparse vectors when withMean=True

## What changes were proposed in this pull request? Allow centering / mean scaling of sparse vectors in StandardScaler, if requested. This is for compatibility with `VectorAssembler` in common usages. ## How was this patch tested? Jenkins tests, including new caes to reflect the new behavior. Author: Sean Owen <sowen@cloudera.com> Closes #14663 from srowen/SPARK-17001.
author: Sean Owen <sowen@cloudera.com> 2016-08-27 08:48:56 +0100
committer: Sean Owen <sowen@cloudera.com> 2016-08-27 08:48:56 +0100
commit: e07baf14120bc94b783649dabf5fffea58bff0de (patch)
tree: 557979925874c18034e793057a9706c3ee6924fa /examples
parent: 9fbced5b25c2f24d50c50516b4b7737f7e3eaf86 (diff)
download: spark-e07baf14120bc94b783649dabf5fffea58bff0de.tar.gz
spark-e07baf14120bc94b783649dabf5fffea58bff0de.tar.bz2
spark-e07baf14120bc94b783649dabf5fffea58bff0de.zip
2 files changed, 0 insertions, 4 deletions
diff --git a/examples/src/main/python/mllib/standard_scaler_example.py b/examples/src/main/python/mllib/standard_scaler_example.py
index 20a77a4708..442094e1bf 100644
--- a/examples/src/main/python/mllib/standard_scaler_example.py
+++ b/examples/src/main/python/mllib/standard_scaler_example.py
@@ -38,8 +38,6 @@ if __name__ == "__main__":
     # data1 will be unit variance.
     data1 = label.zip(scaler1.transform(features))
 
-    # Without converting the features into dense vectors, transformation with zero mean will raise
-    # exception on sparse vector.
     # data2 will be unit variance and zero mean.
     data2 = label.zip(scaler2.transform(features.map(lambda x: Vectors.dense(x.toArray()))))
     # $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala
index fc0aa1b7f0..769fc17b3d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala
@@ -44,8 +44,6 @@ object StandardScalerExample {
     // data1 will be unit variance.
     val data1 = data.map(x => (x.label, scaler1.transform(x.features)))
 
-    // Without converting the features into dense vectors, transformation with zero mean will raise
-    // exception on sparse vector.
     // data2 will be unit variance and zero mean.
     val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray))))
     // $example off$
author	Sean Owen <sowen@cloudera.com>	2016-08-27 08:48:56 +0100
committer	Sean Owen <sowen@cloudera.com>	2016-08-27 08:48:56 +0100
commit	e07baf14120bc94b783649dabf5fffea58bff0de (patch)
tree	557979925874c18034e793057a9706c3ee6924fa /examples
parent	9fbced5b25c2f24d50c50516b4b7737f7e3eaf86 (diff)
download	spark-e07baf14120bc94b783649dabf5fffea58bff0de.tar.gz spark-e07baf14120bc94b783649dabf5fffea58bff0de.tar.bz2 spark-e07baf14120bc94b783649dabf5fffea58bff0de.zip