aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main
diff options
context:
space:
mode:
authorDB Tsai <dbtsai@alpinenow.com>2014-08-11 19:49:29 -0700
committerXiangrui Meng <meng@databricks.com>2014-08-11 19:49:29 -0700
commit6fab941b65f0cb6c9b32e0f8290d76889cda6a87 (patch)
tree21a68ffda086cac8cc263a9493539bde5dc2fa61 /mllib/src/main
parent32638b5e74e02410831b391f555223f90c830498 (diff)
downloadspark-6fab941b65f0cb6c9b32e0f8290d76889cda6a87.tar.gz
spark-6fab941b65f0cb6c9b32e0f8290d76889cda6a87.tar.bz2
spark-6fab941b65f0cb6c9b32e0f8290d76889cda6a87.zip
[SPARK-2934][MLlib] Adding LogisticRegressionWithLBFGS Interface
for training with LBFGS Optimizer which will converge faster than SGD. Author: DB Tsai <dbtsai@alpinenow.com> Closes #1862 from dbtsai/dbtsai-lbfgs-lor and squashes the following commits: aa84b81 [DB Tsai] small change f852bcd [DB Tsai] Remove duplicate method f119fdc [DB Tsai] Formatting 97776aa [DB Tsai] address more feedback 85b4a91 [DB Tsai] address feedback 3cf50c2 [DB Tsai] LogisticRegressionWithLBFGS interface
Diffstat (limited to 'mllib/src/main')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala51
1 files changed, 50 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 2242329b79..31d474a20f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -101,7 +101,7 @@ class LogisticRegressionWithSGD private (
}
/**
- * Top-level methods for calling Logistic Regression.
+ * Top-level methods for calling Logistic Regression using Stochastic Gradient Descent.
* NOTE: Labels used in Logistic Regression should be {0, 1}
*/
object LogisticRegressionWithSGD {
@@ -188,3 +188,52 @@ object LogisticRegressionWithSGD {
train(input, numIterations, 1.0, 1.0)
}
}
+
+/**
+ * Train a classification model for Logistic Regression using Limited-memory BFGS.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ */
+class LogisticRegressionWithLBFGS private (
+ private var convergenceTol: Double,
+ private var maxNumIterations: Int,
+ private var regParam: Double)
+ extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {
+
+ /**
+ * Construct a LogisticRegression object with default parameters
+ */
+ def this() = this(1E-4, 100, 0.0)
+
+ private val gradient = new LogisticGradient()
+ private val updater = new SimpleUpdater()
+ // Have to return new LBFGS object every time since users can reset the parameters anytime.
+ override def optimizer = new LBFGS(gradient, updater)
+ .setNumCorrections(10)
+ .setConvergenceTol(convergenceTol)
+ .setMaxNumIterations(maxNumIterations)
+ .setRegParam(regParam)
+
+ override protected val validators = List(DataValidators.binaryLabelValidator)
+
+ /**
+ * Set the convergence tolerance of iterations for L-BFGS. Default 1E-4.
+ * Smaller value will lead to higher accuracy with the cost of more iterations.
+ */
+ def setConvergenceTol(convergenceTol: Double): this.type = {
+ this.convergenceTol = convergenceTol
+ this
+ }
+
+ /**
+ * Set the maximal number of iterations for L-BFGS. Default 100.
+ */
+ def setNumIterations(numIterations: Int): this.type = {
+ this.maxNumIterations = numIterations
+ this
+ }
+
+ override protected def createModel(weights: Vector, intercept: Double) = {
+ new LogisticRegressionModel(weights, intercept)
+ }
+
+}