aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala23
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala42
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala61
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala31
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala30
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala17
6 files changed, 130 insertions, 74 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index c590492e7a..82124703da 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -24,10 +24,10 @@ import org.jblas.DoubleMatrix
*/
abstract class Gradient extends Serializable {
/**
- * Compute the gradient and loss given features of a single data point.
+ * Compute the gradient and loss given the features of a single data point.
*
- * @param data - Feature values for one data point. Column matrix of size nx1
- * where n is the number of features.
+ * @param data - Feature values for one data point. Column matrix of size dx1
+ * where d is the number of features.
* @param label - Label for this data item.
* @param weights - Column matrix containing weights for every feature.
*
@@ -40,7 +40,8 @@ abstract class Gradient extends Serializable {
}
/**
- * Compute gradient and loss for a logistic loss function.
+ * Compute gradient and loss for a logistic loss function, as used in binary classification.
+ * See also the documentation for the precise formulation.
*/
class LogisticGradient extends Gradient {
override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
@@ -61,22 +62,26 @@ class LogisticGradient extends Gradient {
}
/**
- * Compute gradient and loss for a Least-squared loss function.
+ * Compute gradient and loss for a Least-squared loss function, as used in linear regression.
+ * This is correct for the averaged least squares loss function (mean squared error)
+ * L = 1/n ||A weights-y||^2
+ * See also the documentation for the precise formulation.
*/
-class SquaredGradient extends Gradient {
+class LeastSquaresGradient extends Gradient {
override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
(DoubleMatrix, Double) = {
val diff: Double = data.dot(weights) - label
- val loss = 0.5 * diff * diff
- val gradient = data.mul(diff)
+ val loss = diff * diff
+ val gradient = data.mul(2.0 * diff)
(gradient, loss)
}
}
/**
- * Compute gradient and loss for a Hinge loss function.
+ * Compute gradient and loss for a Hinge loss function, as used in SVM binary classification.
+ * See also the documentation for the precise formulation.
* NOTE: This assumes that the labels are {0,1}
*/
class HingeGradient extends Gradient {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index cd80134737..8e87b98bac 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -17,9 +17,8 @@
package org.apache.spark.mllib.optimization
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
import org.jblas.DoubleMatrix
@@ -39,7 +38,8 @@ class GradientDescent(var gradient: Gradient, var updater: Updater)
private var miniBatchFraction: Double = 1.0
/**
- * Set the step size per-iteration of SGD. Default 1.0.
+ * Set the initial step size of SGD for the first step. Default 1.0.
+ * In subsequent steps, the step size will decrease with stepSize/sqrt(t)
*/
def setStepSize(step: Double): this.type = {
this.stepSize = step
@@ -47,7 +47,8 @@ class GradientDescent(var gradient: Gradient, var updater: Updater)
}
/**
- * Set fraction of data to be used for each SGD iteration. Default 1.0.
+ * Set fraction of data to be used for each SGD iteration.
+ * Default 1.0 (corresponding to deterministic/classical gradient descent)
*/
def setMiniBatchFraction(fraction: Double): this.type = {
this.miniBatchFraction = fraction
@@ -63,7 +64,7 @@ class GradientDescent(var gradient: Gradient, var updater: Updater)
}
/**
- * Set the regularization parameter used for SGD. Default 0.0.
+ * Set the regularization parameter. Default 0.0.
*/
def setRegParam(regParam: Double): this.type = {
this.regParam = regParam
@@ -71,7 +72,8 @@ class GradientDescent(var gradient: Gradient, var updater: Updater)
}
/**
- * Set the gradient function to be used for SGD.
+ * Set the gradient function (of the loss function of one single data example)
+ * to be used for SGD.
*/
def setGradient(gradient: Gradient): this.type = {
this.gradient = gradient
@@ -80,7 +82,9 @@ class GradientDescent(var gradient: Gradient, var updater: Updater)
/**
- * Set the updater function to be used for SGD.
+ * Set the updater function to actually perform a gradient step in a given direction.
+ * The updater is responsible to perform the update from the regularization term as well,
+ * and therefore determines what kind or regularization is used, if any.
*/
def setUpdater(updater: Updater): this.type = {
this.updater = updater
@@ -107,20 +111,26 @@ class GradientDescent(var gradient: Gradient, var updater: Updater)
// Top-level method to run gradient descent.
object GradientDescent extends Logging {
/**
- * Run gradient descent in parallel using mini batches.
+ * Run stochastic gradient descent (SGD) in parallel using mini batches.
+ * In each iteration, we sample a subset (fraction miniBatchFraction) of the total data
+ * in order to compute a gradient estimate.
+ * Sampling, and averaging the subgradients over this subset is performed using one standard
+ * spark map-reduce in each iteration.
*
- * @param data - Input data for SGD. RDD of form (label, [feature values]).
- * @param gradient - Gradient object that will be used to compute the gradient.
- * @param updater - Updater object that will be used to update the model.
- * @param stepSize - stepSize to be used during update.
+ * @param data - Input data for SGD. RDD of the set of data examples, each of
+ * the form (label, [feature values]).
+ * @param gradient - Gradient object (used to compute the gradient of the loss function of
+ * one single data example)
+ * @param updater - Updater function to actually perform a gradient step in a given direction.
+ * @param stepSize - initial step size for the first step
* @param numIterations - number of iterations that SGD should be run.
* @param regParam - regularization parameter
* @param miniBatchFraction - fraction of the input data set that should be used for
* one iteration of SGD. Default value 1.0.
*
* @return A tuple containing two elements. The first element is a column matrix containing
- * weights for every feature, and the second element is an array containing the stochastic
- * loss computed for every iteration.
+ * weights for every feature, and the second element is an array containing the
+ * stochastic loss computed for every iteration.
*/
def runMiniBatchSGD(
data: RDD[(Double, Array[Double])],
@@ -142,6 +152,8 @@ object GradientDescent extends Logging {
var regVal = 0.0
for (i <- 1 to numIterations) {
+ // Sample a subset (fraction miniBatchFraction) of the total data
+ // compute and sum up the subgradients on this subset (this is one map-reduce)
val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42 + i).map {
case (y, features) =>
val featuresCol = new DoubleMatrix(features.length, 1, features:_*)
@@ -160,7 +172,7 @@ object GradientDescent extends Logging {
regVal = update._2
}
- logInfo("GradientDescent finished. Last 10 stochastic losses %s".format(
+ logInfo("GradientDescent.runMiniBatchSGD finished. Last 10 stochastic losses %s".format(
stochasticLossHistory.takeRight(10).mkString(", ")))
(weights.toArray, stochasticLossHistory.toArray)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 37124f261e..889a03e3e6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -21,16 +21,25 @@ import scala.math._
import org.jblas.DoubleMatrix
/**
- * Class used to update weights used in Gradient Descent.
+ * Class used to perform steps (weight update) using Gradient Descent methods.
+ *
+ * For general minimization problems, or for regularized problems of the form
+ * min L(w) + regParam * R(w),
+ * the compute function performs the actual update step, when given some
+ * (e.g. stochastic) gradient direction for the loss L(w),
+ * and a desired step-size (learning rate).
+ *
+ * The updater is responsible to also perform the update coming from the
+ * regularization term R(w) (if any regularization is used).
*/
abstract class Updater extends Serializable {
/**
* Compute an updated value for weights given the gradient, stepSize, iteration number and
- * regularization parameter. Also returns the regularization value computed using the
- * *updated* weights.
+ * regularization parameter. Also returns the regularization value regParam * R(w)
+ * computed using the *updated* weights.
*
- * @param weightsOld - Column matrix of size nx1 where n is the number of features.
- * @param gradient - Column matrix of size nx1 where n is the number of features.
+ * @param weightsOld - Column matrix of size dx1 where d is the number of features.
+ * @param gradient - Column matrix of size dx1 where d is the number of features.
* @param stepSize - step size across iterations
* @param iter - Iteration number
* @param regParam - Regularization parameter
@@ -43,23 +52,29 @@ abstract class Updater extends Serializable {
}
/**
- * A simple updater that adaptively adjusts the learning rate the
- * square root of the number of iterations. Does not perform any regularization.
+ * A simple updater for gradient descent *without* any regularization.
+ * Uses a step-size decreasing with the square root of the number of iterations.
*/
class SimpleUpdater extends Updater {
override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
val thisIterStepSize = stepSize / math.sqrt(iter)
- val normGradient = gradient.mul(thisIterStepSize)
- (weightsOld.sub(normGradient), 0)
+ val step = gradient.mul(thisIterStepSize)
+ (weightsOld.sub(step), 0)
}
}
/**
- * Updater that adjusts learning rate and performs L1 regularization.
+ * Updater for L1 regularized problems.
+ * R(w) = ||w||_1
+ * Uses a step-size decreasing with the square root of the number of iterations.
+
+ * Instead of subgradient of the regularizer, the proximal operator for the
+ * L1 regularization is applied after the gradient step. This is known to
+ * result in better sparsity of the intermediate solution.
*
- * The corresponding proximal operator used is the soft-thresholding function.
- * That is, each weight component is shrunk towards 0 by shrinkageVal.
+ * The corresponding proximal operator for the L1 norm is the soft-thresholding
+ * function. That is, each weight component is shrunk towards 0 by shrinkageVal.
*
* If w > shrinkageVal, set weight component to w-shrinkageVal.
* If w < -shrinkageVal, set weight component to w+shrinkageVal.
@@ -71,10 +86,10 @@ class L1Updater extends Updater {
override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
val thisIterStepSize = stepSize / math.sqrt(iter)
- val normGradient = gradient.mul(thisIterStepSize)
+ val step = gradient.mul(thisIterStepSize)
// Take gradient step
- val newWeights = weightsOld.sub(normGradient)
- // Soft thresholding
+ val newWeights = weightsOld.sub(step)
+ // Apply proximal operator (soft thresholding)
val shrinkageVal = regParam * thisIterStepSize
(0 until newWeights.length).foreach { i =>
val wi = newWeights.get(i)
@@ -85,19 +100,19 @@ class L1Updater extends Updater {
}
/**
- * Updater that adjusts the learning rate and performs L2 regularization
- *
- * See, for example, explanation of gradient and loss with L2 regularization on slide 21-22
- * of <a href="http://people.cs.umass.edu/~sheldon/teaching/2012fa/ml/files/lec7-annotated.pdf">
- * these slides</a>.
+ * Updater for L2 regularized problems.
+ * R(w) = 1/2 ||w||^2
+ * Uses a step-size decreasing with the square root of the number of iterations.
*/
class SquaredL2Updater extends Updater {
override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
val thisIterStepSize = stepSize / math.sqrt(iter)
- val normGradient = gradient.mul(thisIterStepSize)
- val newWeights = weightsOld.mul(1.0 - 2.0 * thisIterStepSize * regParam).sub(normGradient)
- (newWeights, pow(newWeights.norm2, 2.0) * regParam)
+ val step = gradient.mul(thisIterStepSize)
+ // add up both updates from the gradient of the loss (= step) as well as
+ // the gradient of the regularizer (= regParam * weightsOld)
+ val newWeights = weightsOld.mul(1.0 - thisIterStepSize * regParam).sub(step)
+ (newWeights, 0.5 * pow(newWeights.norm2, 2.0) * regParam)
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 7c41793722..fb2bc9b92a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -44,6 +44,11 @@ class LassoModel(
/**
* Train a regression model with L1-regularization using Stochastic Gradient Descent.
+ * This solves the l1-regularized least squares regression formulation
+ * f(weights) = 1/n ||A weights-y||^2 + regParam ||weights||_1
+ * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
+ * its corresponding right hand side label y.
+ * See also the documentation for the precise formulation.
*/
class LassoWithSGD private (
var stepSize: Double,
@@ -53,7 +58,7 @@ class LassoWithSGD private (
extends GeneralizedLinearAlgorithm[LassoModel]
with Serializable {
- val gradient = new SquaredGradient()
+ val gradient = new LeastSquaresGradient()
val updater = new L1Updater()
@transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
.setNumIterations(numIterations)
@@ -113,12 +118,13 @@ object LassoWithSGD {
/**
* Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. Each iteration uses
- * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
- * gradient descent are initialized using the initial weights provided.
+ * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
+ * in gradient descent are initialized using the initial weights provided.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
- * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param stepSize Step size scaling to be used for the iterations of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
@@ -140,9 +146,10 @@ object LassoWithSGD {
/**
* Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. Each iteration uses
- * `miniBatchFraction` fraction of the data to calculate the gradient.
+ * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
@@ -162,9 +169,10 @@ object LassoWithSGD {
/**
* Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. We use the entire data set to
- * update the gradient in each iteration.
+ * update the true gradient in each iteration.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param stepSize Step size to be used for each iteration of Gradient Descent.
* @param regParam Regularization parameter.
* @param numIterations Number of iterations of gradient descent to run.
@@ -183,9 +191,10 @@ object LassoWithSGD {
/**
* Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using a step size of 1.0. We use the entire data set to
- * update the gradient in each iteration.
+ * compute the true gradient in each iteration.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @return a LassoModel which has the weights and offset from training.
*/
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index df599fde76..8ee40addb2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -44,6 +44,12 @@ class LinearRegressionModel(
/**
* Train a linear regression model with no regularization using Stochastic Gradient Descent.
+ * This solves the least squares regression formulation
+ * f(weights) = 1/n ||A weights-y||^2
+ * (which is the mean squared error).
+ * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
+ * its corresponding right hand side label y.
+ * See also the documentation for the precise formulation.
*/
class LinearRegressionWithSGD private (
var stepSize: Double,
@@ -52,7 +58,7 @@ class LinearRegressionWithSGD private (
extends GeneralizedLinearAlgorithm[LinearRegressionModel]
with Serializable {
- val gradient = new SquaredGradient()
+ val gradient = new LeastSquaresGradient()
val updater = new SimpleUpdater()
val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
.setNumIterations(numIterations)
@@ -76,10 +82,11 @@ object LinearRegressionWithSGD {
/**
* Train a Linear Regression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. Each iteration uses
- * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
- * gradient descent are initialized using the initial weights provided.
+ * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
+ * in gradient descent are initialized using the initial weights provided.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
@@ -101,9 +108,10 @@ object LinearRegressionWithSGD {
/**
* Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. Each iteration uses
- * `miniBatchFraction` fraction of the data to calculate the gradient.
+ * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
@@ -121,9 +129,10 @@ object LinearRegressionWithSGD {
/**
* Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. We use the entire data set to
- * update the gradient in each iteration.
+ * compute the true gradient in each iteration.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param stepSize Step size to be used for each iteration of Gradient Descent.
* @param numIterations Number of iterations of gradient descent to run.
* @return a LinearRegressionModel which has the weights and offset from training.
@@ -140,9 +149,10 @@ object LinearRegressionWithSGD {
/**
* Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using a step size of 1.0. We use the entire data set to
- * update the gradient in each iteration.
+ * compute the true gradient in each iteration.
*
- * @param input RDD of (label, array of features) pairs.
+ * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
+ * matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @return a LinearRegressionModel which has the weights and offset from training.
*/
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 0c0e67fb7b..c504d3d40c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -44,6 +44,11 @@ class RidgeRegressionModel(
/**
* Train a regression model with L2-regularization using Stochastic Gradient Descent.
+ * This solves the l1-regularized least squares regression formulation
+ * f(weights) = 1/n ||A weights-y||^2 + regParam/2 ||weights||^2
+ * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
+ * its corresponding right hand side label y.
+ * See also the documentation for the precise formulation.
*/
class RidgeRegressionWithSGD private (
var stepSize: Double,
@@ -53,7 +58,7 @@ class RidgeRegressionWithSGD private (
extends GeneralizedLinearAlgorithm[RidgeRegressionModel]
with Serializable {
- val gradient = new SquaredGradient()
+ val gradient = new LeastSquaresGradient()
val updater = new SquaredL2Updater()
@transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
@@ -114,8 +119,8 @@ object RidgeRegressionWithSGD {
/**
* Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. Each iteration uses
- * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
- * gradient descent are initialized using the initial weights provided.
+ * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
+ * in gradient descent are initialized using the initial weights provided.
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
@@ -141,7 +146,7 @@ object RidgeRegressionWithSGD {
/**
* Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. Each iteration uses
- * `miniBatchFraction` fraction of the data to calculate the gradient.
+ * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
@@ -163,7 +168,7 @@ object RidgeRegressionWithSGD {
/**
* Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. We use the entire data set to
- * update the gradient in each iteration.
+ * compute the true gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -184,7 +189,7 @@ object RidgeRegressionWithSGD {
/**
* Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using a step size of 1.0. We use the entire data set to
- * update the gradient in each iteration.
+ * compute the true gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.