aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorwm624@hotmail.com <wm624@hotmail.com>2017-01-12 22:27:57 -0800
committerYanbo Liang <ybliang8@gmail.com>2017-01-12 22:27:57 -0800
commit7f24a0b6c32c56a38cf879d953bbd523922ab9c9 (patch)
treed60ea1d9a8fcf309fb5c938452ac7018fbc5dd38 /mllib
parent3356b8b6a9184fcab8d0fe993f3545c3beaa4d99 (diff)
downloadspark-7f24a0b6c32c56a38cf879d953bbd523922ab9c9.tar.gz
spark-7f24a0b6c32c56a38cf879d953bbd523922ab9c9.tar.bz2
spark-7f24a0b6c32c56a38cf879d953bbd523922ab9c9.zip
[SPARK-19142][SPARKR] spark.kmeans should take seed, initSteps, and tol as parameters
## What changes were proposed in this pull request? spark.kmeans doesn't have interface to set initSteps, seed and tol. As Spark Kmeans algorithm doesn't take the same set of parameters as R kmeans, we should maintain a different interface in spark.kmeans. Add missing parameters and corresponding document. Modified existing unit tests to take additional parameters. Author: wm624@hotmail.com <wm624@hotmail.com> Closes #16523 from wangmiao1981/kmeans.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala9
1 files changed, 8 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
index ea9458525a..a1fefd31c0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
@@ -68,7 +68,10 @@ private[r] object KMeansWrapper extends MLReadable[KMeansWrapper] {
formula: String,
k: Int,
maxIter: Int,
- initMode: String): KMeansWrapper = {
+ initMode: String,
+ seed: String,
+ initSteps: Int,
+ tol: Double): KMeansWrapper = {
val rFormula = new RFormula()
.setFormula(formula)
@@ -87,6 +90,10 @@ private[r] object KMeansWrapper extends MLReadable[KMeansWrapper] {
.setMaxIter(maxIter)
.setInitMode(initMode)
.setFeaturesCol(rFormula.getFeaturesCol)
+ .setInitSteps(initSteps)
+ .setTol(tol)
+
+ if (seed != null && seed.length > 0) kMeans.setSeed(seed.toInt)
val pipeline = new Pipeline()
.setStages(Array(rFormulaModel, kMeans))