aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala5
2 files changed, 10 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 118a6e3e6a..626e97efb4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -66,6 +66,11 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
* :: Experimental ::
* Standardizes features by removing the mean and scaling to unit variance using column summary
* statistics on the samples in the training set.
+ *
+ * The "unit std" is computed using the
+ * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
+ * corrected sample standard deviation]],
+ * which is computed as the square root of the unbiased sample variance.
*/
@Experimental
class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index 5c35e1b91c..ee97045f34 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -27,6 +27,11 @@ import org.apache.spark.rdd.RDD
* Standardizes features by removing the mean and scaling to unit std using column summary
* statistics on the samples in the training set.
*
+ * The "unit std" is computed using the
+ * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
+ * corrected sample standard deviation]],
+ * which is computed as the square root of the unbiased sample variance.
+ *
* @param withMean False by default. Centers the data with mean before scaling. It will build a
* dense output, so this does not work on sparse input and will raise an exception.
* @param withStd True by default. Scales the data to unit standard deviation.