From 3c3371bbd6361011b138cce88f6396a2aa4e2cb9 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Wed, 27 Jul 2016 11:24:28 +0100 Subject: [MINOR][ML] Fix some mistake in LinearRegression formula. ## What changes were proposed in this pull request? Fix some mistake in ```LinearRegression``` formula. ## How was this patch tested? Documents change, no tests. Author: Yanbo Liang Closes #14369 from yanboliang/LiR-formula. --- .../scala/org/apache/spark/ml/regression/LinearRegression.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mllib/src') diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index a0ff7f07aa..f3dc65e0df 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -800,16 +800,16 @@ class LinearRegressionSummary private[regression] ( * {{{ * \frac{\partial L}{\partial w_i} = * 1/N \sum_j diff_j (x_{ij} - \bar{x_i}) / \hat{x_i} - * = 1/N ((\sum_j diff_j x_{ij} / \hat{x_i}) - diffSum \bar{x_i}) / \hat{x_i}) + * = 1/N ((\sum_j diff_j x_{ij} / \hat{x_i}) - diffSum \bar{x_i} / \hat{x_i}) * = 1/N ((\sum_j diff_j x_{ij} / \hat{x_i}) + correction_i) * }}}, - * where correction_i = - diffSum \bar{x_i}) / \hat{x_i} + * where correction_i = - diffSum \bar{x_i} / \hat{x_i} * * A simple math can show that diffSum is actually zero, so we don't even * need to add the correction terms in the end. From the definition of diff, * {{{ * diffSum = \sum_j (\sum_i w_i(x_{ij} - \bar{x_i}) / \hat{x_i} - (y_j - \bar{y}) / \hat{y}) - * = N * (\sum_i w_i(\bar{x_i} - \bar{x_i}) / \hat{x_i} - (\bar{y_j} - \bar{y}) / \hat{y}) + * = N * (\sum_i w_i(\bar{x_i} - \bar{x_i}) / \hat{x_i} - (\bar{y} - \bar{y}) / \hat{y}) * = 0 * }}} * -- cgit v1.2.3