aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala10
2 files changed, 6 insertions, 10 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index de9fa4aebf..23141aaf42 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -51,10 +51,10 @@ class KMeans private (
/**
* Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
- * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, seed: random}.
+ * initializationMode: "k-means||", initializationSteps: 2, epsilon: 1e-4, seed: random}.
*/
@Since("0.8.0")
- def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
+ def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 2, 1e-4, Utils.random.nextLong())
/**
* Number of clusters to create (k).
@@ -134,7 +134,7 @@ class KMeans private (
/**
* Set the number of steps for the k-means|| initialization mode. This is an advanced
- * setting -- the default of 5 is almost always enough. Default: 5.
+ * setting -- the default of 2 is almost always enough. Default: 2.
*/
@Since("0.8.0")
def setInitializationSteps(initializationSteps: Int): this.type = {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
index 3d81d375c7..b33b86b39a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
@@ -49,7 +49,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
val r1 = 1.0
val n1 = 10
val r2 = 4.0
- val n2 = 40
+ val n2 = 10
val n = n1 + n2
val points = genCircle(r1, n1) ++ genCircle(r2, n2)
val similarities = for (i <- 1 until n; j <- 0 until i) yield {
@@ -83,7 +83,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
val r1 = 1.0
val n1 = 10
val r2 = 4.0
- val n2 = 40
+ val n2 = 10
val n = n1 + n2
val points = genCircle(r1, n1) ++ genCircle(r2, n2)
val similarities = for (i <- 1 until n; j <- 0 until i) yield {
@@ -91,11 +91,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
}
val edges = similarities.flatMap { case (i, j, s) =>
- if (i != j) {
- Seq(Edge(i, j, s), Edge(j, i, s))
- } else {
- None
- }
+ Seq(Edge(i, j, s), Edge(j, i, s))
}
val graph = Graph.fromEdges(sc.parallelize(edges, 2), 0.0)