From 7f24a0b6c32c56a38cf879d953bbd523922ab9c9 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Thu, 12 Jan 2017 22:27:57 -0800 Subject: [SPARK-19142][SPARKR] spark.kmeans should take seed, initSteps, and tol as parameters ## What changes were proposed in this pull request? spark.kmeans doesn't have interface to set initSteps, seed and tol. As Spark Kmeans algorithm doesn't take the same set of parameters as R kmeans, we should maintain a different interface in spark.kmeans. Add missing parameters and corresponding document. Modified existing unit tests to take additional parameters. Author: wm624@hotmail.com Closes #16523 from wangmiao1981/kmeans. --- mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala index ea9458525a..a1fefd31c0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala @@ -68,7 +68,10 @@ private[r] object KMeansWrapper extends MLReadable[KMeansWrapper] { formula: String, k: Int, maxIter: Int, - initMode: String): KMeansWrapper = { + initMode: String, + seed: String, + initSteps: Int, + tol: Double): KMeansWrapper = { val rFormula = new RFormula() .setFormula(formula) @@ -87,6 +90,10 @@ private[r] object KMeansWrapper extends MLReadable[KMeansWrapper] { .setMaxIter(maxIter) .setInitMode(initMode) .setFeaturesCol(rFormula.getFeaturesCol) + .setInitSteps(initSteps) + .setTol(tol) + + if (seed != null && seed.length > 0) kMeans.setSeed(seed.toInt) val pipeline = new Pipeline() .setStages(Array(rFormulaModel, kMeans)) -- cgit v1.2.3