From a48296f4fe513b63041f1a26231cfe152b69657f Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 14 Mar 2016 12:46:53 -0700 Subject: [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD ## What changes were proposed in this pull request? `LinearRegressionWithSGD` and `StreamingLinearRegressionWithSGD` does not have `regParam` as their constructor arguments. They just depends on GradientDescent's default reqParam values. To be consistent with other algorithms, we had better add them. The same default value is used. ## How was this patch tested? Pass the existing unit test. Author: Dongjoon Hyun Closes #11527 from dongjoon-hyun/SPARK-13686. --- .../apache/spark/mllib/regression/LinearRegression.scala | 8 +++++--- .../regression/StreamingLinearRegressionWithSGD.scala | 16 +++++++++++++--- project/MimaExcludes.scala | 3 +++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 7da82c862a..e754e74492 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -89,6 +89,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] { class LinearRegressionWithSGD private[mllib] ( private var stepSize: Double, private var numIterations: Int, + private var regParam: Double, private var miniBatchFraction: Double) extends GeneralizedLinearAlgorithm[LinearRegressionModel] with Serializable { @@ -98,6 +99,7 @@ class LinearRegressionWithSGD private[mllib] ( override val optimizer = new GradientDescent(gradient, updater) .setStepSize(stepSize) .setNumIterations(numIterations) + .setRegParam(regParam) .setMiniBatchFraction(miniBatchFraction) /** @@ -105,7 +107,7 @@ class LinearRegressionWithSGD private[mllib] ( * numIterations: 100, miniBatchFraction: 1.0}. */ @Since("0.8.0") - def this() = this(1.0, 100, 1.0) + def this() = this(1.0, 100, 0.0, 1.0) override protected[mllib] def createModel(weights: Vector, intercept: Double) = { new LinearRegressionModel(weights, intercept) @@ -141,7 +143,7 @@ object LinearRegressionWithSGD { stepSize: Double, miniBatchFraction: Double, initialWeights: Vector): LinearRegressionModel = { - new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction) + new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction) .run(input, initialWeights) } @@ -163,7 +165,7 @@ object LinearRegressionWithSGD { numIterations: Int, stepSize: Double, miniBatchFraction: Double): LinearRegressionModel = { - new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input) + new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(input) } /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala index fe2a46b9ee..e8f4422fd4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala @@ -43,6 +43,7 @@ import org.apache.spark.mllib.linalg.Vector class StreamingLinearRegressionWithSGD private[mllib] ( private var stepSize: Double, private var numIterations: Int, + private var regParam: Double, private var miniBatchFraction: Double) extends StreamingLinearAlgorithm[LinearRegressionModel, LinearRegressionWithSGD] with Serializable { @@ -54,10 +55,10 @@ class StreamingLinearRegressionWithSGD private[mllib] ( * (see `StreamingLinearAlgorithm`) */ @Since("1.1.0") - def this() = this(0.1, 50, 1.0) + def this() = this(0.1, 50, 0.0, 1.0) @Since("1.1.0") - val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction) + val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction) protected var model: Option[LinearRegressionModel] = None @@ -71,8 +72,17 @@ class StreamingLinearRegressionWithSGD private[mllib] ( } /** - * Set the number of iterations of gradient descent to run per update. Default: 50. + * Set the regularization parameter. Default: 0.0. */ + @Since("2.0.0") + def setRegParam(regParam: Double): this.type = { + this.algorithm.optimizer.setRegParam(regParam) + this + } + + /** + * Set the number of iterations of gradient descent to run per update. Default: 50. + */ @Since("1.1.0") def setNumIterations(numIterations: Int): this.type = { this.algorithm.optimizer.setNumIterations(numIterations) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index faa52bf18c..a9973bc24c 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -318,6 +318,9 @@ object MimaExcludes { ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.MultilabelMetrics.this"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions"), ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions") + ) ++ Seq( + // [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD + ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.this") ) case v if v.startsWith("1.6") => Seq( -- cgit v1.2.3