diff options
author | Gang Bai <me@baigang.net> | 2014-06-20 08:52:20 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2014-06-20 08:52:20 -0700 |
commit | d484ddeff1440d8e14e05c3cd7e7a18746f1a586 (patch) | |
tree | 8b4287272892ed3864c7e13341de076774c63043 /mllib | |
parent | 2f6a835e1a039a0b1ba6e184b3350444b70f91df (diff) | |
download | spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.gz spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.bz2 spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.zip |
[SPARK-2163] class LBFGS optimize with Double tolerance instead of Int
https://issues.apache.org/jira/browse/SPARK-2163
This pull request includes the change for **[SPARK-2163]**:
* Changed the convergence tolerance parameter from type `Int` to type `Double`.
* Added types for vars in `class LBFGS`, making the style consistent with `class GradientDescent`.
* Added associated test to check that optimizing via `class LBFGS` produces the same results as via calling `runLBFGS` from `object LBFGS`.
This is a very minor change but it will solve the problem in my implementation of a regression model for count data, where I make use of LBFGS for parameter estimation.
Author: Gang Bai <me@baigang.net>
Closes #1104 from BaiGang/fix_int_tol and squashes the following commits:
cecf02c [Gang Bai] Changed setConvergenceTol'' to specify tolerance with a parameter of type Double. For the reason and the problem caused by an Int parameter, please check https://issues.apache.org/jira/browse/SPARK-2163. Added a test in LBFGSSuite for validating that optimizing via class LBFGS produces the same results as calling runLBFGS from object LBFGS. Keep the indentations and styles correct.
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala | 2 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala | 34 |
2 files changed, 35 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index 8f187c9df5..7bbed9c8fd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -60,7 +60,7 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) * Set the convergence tolerance of iterations for L-BFGS. Default 1E-4. * Smaller value will lead to higher accuracy with the cost of more iterations. */ - def setConvergenceTol(tolerance: Int): this.type = { + def setConvergenceTol(tolerance: Double): this.type = { this.convergenceTol = tolerance this } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala index 4b1850659a..fe7a9033cd 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala @@ -195,4 +195,38 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers { assert(lossLBFGS3.length == 6) assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol) } + + test("Optimize via class LBFGS.") { + val regParam = 0.2 + + // Prepare another non-zero weights to compare the loss in the first iteration. + val initialWeightsWithIntercept = Vectors.dense(0.3, 0.12) + val convergenceTol = 1e-12 + val maxNumIterations = 10 + + val lbfgsOptimizer = new LBFGS(gradient, squaredL2Updater) + .setNumCorrections(numCorrections) + .setConvergenceTol(convergenceTol) + .setMaxNumIterations(maxNumIterations) + .setRegParam(regParam) + + val weightLBFGS = lbfgsOptimizer.optimize(dataRDD, initialWeightsWithIntercept) + + val numGDIterations = 50 + val stepSize = 1.0 + val (weightGD, _) = GradientDescent.runMiniBatchSGD( + dataRDD, + gradient, + squaredL2Updater, + stepSize, + numGDIterations, + regParam, + miniBatchFrac, + initialWeightsWithIntercept) + + // for class LBFGS and the optimize method, we only look at the weights + assert(compareDouble(weightLBFGS(0), weightGD(0), 0.02) && + compareDouble(weightLBFGS(1), weightGD(1), 0.02), + "The weight differences between LBFGS and GD should be within 2%.") + } } |