diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2016-09-11 13:47:13 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-09-11 13:47:13 +0100 |
commit | 883c7631847a95684534222c1b6cfed8e62710c8 (patch) | |
tree | b7b007aa8e0891b507a87539b4221e711ce0c9fd /mllib | |
parent | c76baff0cc4775c2191d075cc9a8176e4915fec8 (diff) | |
download | spark-883c7631847a95684534222c1b6cfed8e62710c8.tar.gz spark-883c7631847a95684534222c1b6cfed8e62710c8.tar.bz2 spark-883c7631847a95684534222c1b6cfed8e62710c8.zip |
[SPARK-17389][FOLLOW-UP][ML] Change KMeans k-means|| default init steps from 5 to 2.
## What changes were proposed in this pull request?
#14956 reduced default k-means|| init steps to 2 from 5 only for spark.mllib package, we should also do same change for spark.ml and PySpark.
## How was this patch tested?
Existing tests.
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #15050 from yanboliang/spark-17389.
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala | 4 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala | 2 |
2 files changed, 3 insertions, 3 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 6c46be7196..b04e82838e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -69,7 +69,7 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe /** * Param for the number of steps for the k-means|| initialization mode. This is an advanced - * setting -- the default of 5 is almost always enough. Must be > 0. Default: 5. + * setting -- the default of 2 is almost always enough. Must be > 0. Default: 2. * @group expertParam */ @Since("1.5.0") @@ -262,7 +262,7 @@ class KMeans @Since("1.5.0") ( k -> 2, maxIter -> 20, initMode -> MLlibKMeans.K_MEANS_PARALLEL, - initSteps -> 5, + initSteps -> 2, tol -> 1e-4) @Since("1.5.0") diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala index 88f31a1cd2..c9ba5a288a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala @@ -45,7 +45,7 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultR assert(kmeans.getPredictionCol === "prediction") assert(kmeans.getMaxIter === 20) assert(kmeans.getInitMode === MLlibKMeans.K_MEANS_PARALLEL) - assert(kmeans.getInitSteps === 5) + assert(kmeans.getInitSteps === 2) assert(kmeans.getTol === 1e-4) } |