aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorFeynman Liang <fliang@databricks.com>2015-08-25 11:58:47 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-08-25 11:58:47 -0700
commit881208a8e849facf54166bdd69d3634407f952e7 (patch)
tree14cb4cf6e9f781c486463cb98e5eed36216c49c9 /mllib
parentb37f0cc1b4c064d6f09edb161250fa8b783de52a (diff)
downloadspark-881208a8e849facf54166bdd69d3634407f952e7.tar.gz
spark-881208a8e849facf54166bdd69d3634407f952e7.tar.bz2
spark-881208a8e849facf54166bdd69d3634407f952e7.zip
[SPARK-10230] [MLLIB] Rename optimizeAlpha to optimizeDocConcentration
See [discussion](https://github.com/apache/spark/pull/8254#discussion_r37837770) CC jkbradley Author: Feynman Liang <fliang@databricks.com> Closes #8422 from feynmanliang/SPARK-10230.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala16
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala2
2 files changed, 9 insertions, 9 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 5c2aae6403..38486e949b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -258,7 +258,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
private var tau0: Double = 1024
private var kappa: Double = 0.51
private var miniBatchFraction: Double = 0.05
- private var optimizeAlpha: Boolean = false
+ private var optimizeDocConcentration: Boolean = false
// internal data structure
private var docs: RDD[(Long, Vector)] = null
@@ -335,20 +335,20 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
}
/**
- * Optimize alpha, indicates whether alpha (Dirichlet parameter for document-topic distribution)
- * will be optimized during training.
+ * Optimize docConcentration, indicates whether docConcentration (Dirichlet parameter for
+ * document-topic distribution) will be optimized during training.
*/
@Since("1.5.0")
- def getOptimzeAlpha: Boolean = this.optimizeAlpha
+ def getOptimizeDocConcentration: Boolean = this.optimizeDocConcentration
/**
- * Sets whether to optimize alpha parameter during training.
+ * Sets whether to optimize docConcentration parameter during training.
*
* Default: false
*/
@Since("1.5.0")
- def setOptimzeAlpha(optimizeAlpha: Boolean): this.type = {
- this.optimizeAlpha = optimizeAlpha
+ def setOptimizeDocConcentration(optimizeDocConcentration: Boolean): this.type = {
+ this.optimizeDocConcentration = optimizeDocConcentration
this
}
@@ -458,7 +458,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
// Note that this is an optimization to avoid batch.count
updateLambda(batchResult, (miniBatchFraction * corpusSize).ceil.toInt)
- if (optimizeAlpha) updateAlpha(gammat)
+ if (optimizeDocConcentration) updateAlpha(gammat)
this
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 8a714f9b79..746a76a7e5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -423,7 +423,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
val k = 2
val docs = sc.parallelize(toyData)
val op = new OnlineLDAOptimizer().setMiniBatchFraction(1).setTau0(1024).setKappa(0.51)
- .setGammaShape(100).setOptimzeAlpha(true).setSampleWithReplacement(false)
+ .setGammaShape(100).setOptimizeDocConcentration(true).setSampleWithReplacement(false)
val lda = new LDA().setK(k)
.setDocConcentration(1D / k)
.setTopicConcentration(0.01)