aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorGang Bai <me@baigang.net>2014-06-20 08:52:20 -0700
committerXiangrui Meng <meng@databricks.com>2014-06-20 08:52:20 -0700
commitd484ddeff1440d8e14e05c3cd7e7a18746f1a586 (patch)
tree8b4287272892ed3864c7e13341de076774c63043 /mllib
parent2f6a835e1a039a0b1ba6e184b3350444b70f91df (diff)
downloadspark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.gz
spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.tar.bz2
spark-d484ddeff1440d8e14e05c3cd7e7a18746f1a586.zip
[SPARK-2163] class LBFGS optimize with Double tolerance instead of Int
https://issues.apache.org/jira/browse/SPARK-2163 This pull request includes the change for **[SPARK-2163]**: * Changed the convergence tolerance parameter from type `Int` to type `Double`. * Added types for vars in `class LBFGS`, making the style consistent with `class GradientDescent`. * Added associated test to check that optimizing via `class LBFGS` produces the same results as via calling `runLBFGS` from `object LBFGS`. This is a very minor change but it will solve the problem in my implementation of a regression model for count data, where I make use of LBFGS for parameter estimation. Author: Gang Bai <me@baigang.net> Closes #1104 from BaiGang/fix_int_tol and squashes the following commits: cecf02c [Gang Bai] Changed setConvergenceTol'' to specify tolerance with a parameter of type Double. For the reason and the problem caused by an Int parameter, please check https://issues.apache.org/jira/browse/SPARK-2163. Added a test in LBFGSSuite for validating that optimizing via class LBFGS produces the same results as calling runLBFGS from object LBFGS. Keep the indentations and styles correct.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala34
2 files changed, 35 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index 8f187c9df5..7bbed9c8fd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -60,7 +60,7 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
* Set the convergence tolerance of iterations for L-BFGS. Default 1E-4.
* Smaller value will lead to higher accuracy with the cost of more iterations.
*/
- def setConvergenceTol(tolerance: Int): this.type = {
+ def setConvergenceTol(tolerance: Double): this.type = {
this.convergenceTol = tolerance
this
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index 4b1850659a..fe7a9033cd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -195,4 +195,38 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
assert(lossLBFGS3.length == 6)
assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol)
}
+
+ test("Optimize via class LBFGS.") {
+ val regParam = 0.2
+
+ // Prepare another non-zero weights to compare the loss in the first iteration.
+ val initialWeightsWithIntercept = Vectors.dense(0.3, 0.12)
+ val convergenceTol = 1e-12
+ val maxNumIterations = 10
+
+ val lbfgsOptimizer = new LBFGS(gradient, squaredL2Updater)
+ .setNumCorrections(numCorrections)
+ .setConvergenceTol(convergenceTol)
+ .setMaxNumIterations(maxNumIterations)
+ .setRegParam(regParam)
+
+ val weightLBFGS = lbfgsOptimizer.optimize(dataRDD, initialWeightsWithIntercept)
+
+ val numGDIterations = 50
+ val stepSize = 1.0
+ val (weightGD, _) = GradientDescent.runMiniBatchSGD(
+ dataRDD,
+ gradient,
+ squaredL2Updater,
+ stepSize,
+ numGDIterations,
+ regParam,
+ miniBatchFrac,
+ initialWeightsWithIntercept)
+
+ // for class LBFGS and the optimize method, we only look at the weights
+ assert(compareDouble(weightLBFGS(0), weightGD(0), 0.02) &&
+ compareDouble(weightLBFGS(1), weightGD(1), 0.02),
+ "The weight differences between LBFGS and GD should be within 2%.")
+ }
}