[SPARK-2163] class LBFGS optimize with Double tolerance instead of Int

https://issues.apache.org/jira/browse/SPARK-2163 This pull request includes the change for **[SPARK-2163]**: * Changed the convergence tolerance parameter from type `Int` to type `Double`. * Added types for vars in `class LBFGS`, making the style consistent with `class GradientDescent`. * Added associated test to check that optimizing via `class LBFGS` produces the same results as via calling `runLBFGS` from `object LBFGS`. This is a very minor change but it will solve the problem in my implementation of a regression model for count data, where I make use of LBFGS for parameter estimation. Author: Gang Bai <me@baigang.net> Closes #1104 from BaiGang/fix_int_tol and squashes the following commits: cecf02c [Gang Bai] Changed setConvergenceTol'' to specify tolerance with a parameter of type Double. For the reason and the problem caused by an Int parameter, please check https://issues.apache.org/jira/browse/SPARK-2163. Added a test in LBFGSSuite for validating that optimizing via class LBFGS produces the same results as calling runLBFGS from object LBFGS. Keep the indentations and styles correct.
author: Gang Bai <me@baigang.net> 2014-06-20 08:52:20 -0700
committer: Xiangrui Meng <meng@databricks.com> 2014-06-20 08:52:20 -0700
commit: d484ddeff1440d8e14e05c3cd7e7a18746f1a586 (patch)
tree: 8b4287272892ed3864c7e13341de076774c63043 /mllib
parent: 2f6a835e1a039a0b1ba6e184b3350444b70f91df (diff)
download: spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.gz
spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.bz2
spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.zip
2 files changed, 35 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index 8f187c9df5..7bbed9c8fd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -60,7 +60,7 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
    * Set the convergence tolerance of iterations for L-BFGS. Default 1E-4.
    * Smaller value will lead to higher accuracy with the cost of more iterations.
    */
-  def setConvergenceTol(tolerance: Int): this.type = {
+  def setConvergenceTol(tolerance: Double): this.type = {
     this.convergenceTol = tolerance
     this
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index 4b1850659a..fe7a9033cd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -195,4 +195,38 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
     assert(lossLBFGS3.length == 6)
     assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol)
   }
+
+  test("Optimize via class LBFGS.") {
+    val regParam = 0.2
+
+    // Prepare another non-zero weights to compare the loss in the first iteration.
+    val initialWeightsWithIntercept = Vectors.dense(0.3, 0.12)
+    val convergenceTol = 1e-12
+    val maxNumIterations = 10
+
+    val lbfgsOptimizer = new LBFGS(gradient, squaredL2Updater)
+      .setNumCorrections(numCorrections)
+      .setConvergenceTol(convergenceTol)
+      .setMaxNumIterations(maxNumIterations)
+      .setRegParam(regParam)
+
+    val weightLBFGS = lbfgsOptimizer.optimize(dataRDD, initialWeightsWithIntercept)
+
+    val numGDIterations = 50
+    val stepSize = 1.0
+    val (weightGD, _) = GradientDescent.runMiniBatchSGD(
+      dataRDD,
+      gradient,
+      squaredL2Updater,
+      stepSize,
+      numGDIterations,
+      regParam,
+      miniBatchFrac,
+      initialWeightsWithIntercept)
+
+    // for class LBFGS and the optimize method, we only look at the weights
+    assert(compareDouble(weightLBFGS(0), weightGD(0), 0.02) &&
+      compareDouble(weightLBFGS(1), weightGD(1), 0.02),
+      "The weight differences between LBFGS and GD should be within 2%.")
+  }
 }
author	Gang Bai <me@baigang.net>	2014-06-20 08:52:20 -0700
committer	Xiangrui Meng <meng@databricks.com>	2014-06-20 08:52:20 -0700
commit	d484ddeff1440d8e14e05c3cd7e7a18746f1a586 (patch)
tree	8b4287272892ed3864c7e13341de076774c63043 /mllib
parent	2f6a835e1a039a0b1ba6e184b3350444b70f91df (diff)
download	spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.gz spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.bz2 spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.zip