aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-03-14 12:46:53 -0700
committerXiangrui Meng <meng@databricks.com>2016-03-14 12:46:53 -0700
commita48296f4fe513b63041f1a26231cfe152b69657f (patch)
tree5a92d8270743f14b222b01b9e191f299b7d03e5e
parent23385e853e7ca54332c6098cf83da7d0723546fe (diff)
downloadspark-a48296f4fe513b63041f1a26231cfe152b69657f.tar.gz
spark-a48296f4fe513b63041f1a26231cfe152b69657f.tar.bz2
spark-a48296f4fe513b63041f1a26231cfe152b69657f.zip
[SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD
## What changes were proposed in this pull request? `LinearRegressionWithSGD` and `StreamingLinearRegressionWithSGD` does not have `regParam` as their constructor arguments. They just depends on GradientDescent's default reqParam values. To be consistent with other algorithms, we had better add them. The same default value is used. ## How was this patch tested? Pass the existing unit test. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #11527 from dongjoon-hyun/SPARK-13686.
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala16
-rw-r--r--project/MimaExcludes.scala3
3 files changed, 21 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 7da82c862a..e754e74492 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -89,6 +89,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
class LinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
+ private var regParam: Double,
private var miniBatchFraction: Double)
extends GeneralizedLinearAlgorithm[LinearRegressionModel] with Serializable {
@@ -98,6 +99,7 @@ class LinearRegressionWithSGD private[mllib] (
override val optimizer = new GradientDescent(gradient, updater)
.setStepSize(stepSize)
.setNumIterations(numIterations)
+ .setRegParam(regParam)
.setMiniBatchFraction(miniBatchFraction)
/**
@@ -105,7 +107,7 @@ class LinearRegressionWithSGD private[mllib] (
* numIterations: 100, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
- def this() = this(1.0, 100, 1.0)
+ def this() = this(1.0, 100, 0.0, 1.0)
override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
new LinearRegressionModel(weights, intercept)
@@ -141,7 +143,7 @@ object LinearRegressionWithSGD {
stepSize: Double,
miniBatchFraction: Double,
initialWeights: Vector): LinearRegressionModel = {
- new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
+ new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
.run(input, initialWeights)
}
@@ -163,7 +165,7 @@ object LinearRegressionWithSGD {
numIterations: Int,
stepSize: Double,
miniBatchFraction: Double): LinearRegressionModel = {
- new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input)
+ new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(input)
}
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index fe2a46b9ee..e8f4422fd4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -43,6 +43,7 @@ import org.apache.spark.mllib.linalg.Vector
class StreamingLinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
+ private var regParam: Double,
private var miniBatchFraction: Double)
extends StreamingLinearAlgorithm[LinearRegressionModel, LinearRegressionWithSGD]
with Serializable {
@@ -54,10 +55,10 @@ class StreamingLinearRegressionWithSGD private[mllib] (
* (see `StreamingLinearAlgorithm`)
*/
@Since("1.1.0")
- def this() = this(0.1, 50, 1.0)
+ def this() = this(0.1, 50, 0.0, 1.0)
@Since("1.1.0")
- val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
+ val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction)
protected var model: Option[LinearRegressionModel] = None
@@ -71,8 +72,17 @@ class StreamingLinearRegressionWithSGD private[mllib] (
}
/**
- * Set the number of iterations of gradient descent to run per update. Default: 50.
+ * Set the regularization parameter. Default: 0.0.
*/
+ @Since("2.0.0")
+ def setRegParam(regParam: Double): this.type = {
+ this.algorithm.optimizer.setRegParam(regParam)
+ this
+ }
+
+ /**
+ * Set the number of iterations of gradient descent to run per update. Default: 50.
+ */
@Since("1.1.0")
def setNumIterations(numIterations: Int): this.type = {
this.algorithm.optimizer.setNumIterations(numIterations)
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index faa52bf18c..a9973bc24c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -318,6 +318,9 @@ object MimaExcludes {
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.MultilabelMetrics.this"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions"),
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions")
+ ) ++ Seq(
+ // [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD
+ ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.this")
)
case v if v.startsWith("1.6") =>
Seq(