diff options
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala | 5 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala | 5 |
2 files changed, 10 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 118a6e3e6a..626e97efb4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -66,6 +66,11 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with * :: Experimental :: * Standardizes features by removing the mean and scaling to unit variance using column summary * statistics on the samples in the training set. + * + * The "unit std" is computed using the + * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation + * corrected sample standard deviation]], + * which is computed as the square root of the unbiased sample variance. */ @Experimental class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel] diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index 5c35e1b91c..ee97045f34 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -27,6 +27,11 @@ import org.apache.spark.rdd.RDD * Standardizes features by removing the mean and scaling to unit std using column summary * statistics on the samples in the training set. * + * The "unit std" is computed using the + * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation + * corrected sample standard deviation]], + * which is computed as the square root of the unbiased sample variance. + * * @param withMean False by default. Centers the data with mean before scaling. It will build a * dense output, so this does not work on sparse input and will raise an exception. * @param withStd True by default. Scales the data to unit standard deviation. |