aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-09-11 08:00:55 +0100
committerSean Owen <sowen@cloudera.com>2016-09-11 08:00:55 +0100
commit29ba9578f44c7caa8451386cee1f03f4e0ed8fc7 (patch)
treec1268207d0c7997161d632eb46dd7bf00f88668f /mllib/src/test
parent71b7d42f5fa8d3a891ce831eedb308d1f407dd7e (diff)
downloadspark-29ba9578f44c7caa8451386cee1f03f4e0ed8fc7.tar.gz
spark-29ba9578f44c7caa8451386cee1f03f4e0ed8fc7.tar.bz2
spark-29ba9578f44c7caa8451386cee1f03f4e0ed8fc7.zip
[SPARK-17389][ML][MLLIB] KMeans speedup with better choice of k-means|| init steps = 2
## What changes were proposed in this pull request? Reduce default k-means|| init steps to 2 from 5. See JIRA for discussion. See also https://github.com/apache/spark/pull/14948 ## How was this patch tested? Existing tests. Author: Sean Owen <sowen@cloudera.com> Closes #14956 from srowen/SPARK-17389.2.
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala10
1 files changed, 3 insertions, 7 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
index 3d81d375c7..b33b86b39a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
@@ -49,7 +49,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
val r1 = 1.0
val n1 = 10
val r2 = 4.0
- val n2 = 40
+ val n2 = 10
val n = n1 + n2
val points = genCircle(r1, n1) ++ genCircle(r2, n2)
val similarities = for (i <- 1 until n; j <- 0 until i) yield {
@@ -83,7 +83,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
val r1 = 1.0
val n1 = 10
val r2 = 4.0
- val n2 = 40
+ val n2 = 10
val n = n1 + n2
val points = genCircle(r1, n1) ++ genCircle(r2, n2)
val similarities = for (i <- 1 until n; j <- 0 until i) yield {
@@ -91,11 +91,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
}
val edges = similarities.flatMap { case (i, j, s) =>
- if (i != j) {
- Seq(Edge(i, j, s), Edge(j, i, s))
- } else {
- None
- }
+ Seq(Edge(i, j, s), Edge(j, i, s))
}
val graph = Graph.fromEdges(sc.parallelize(edges, 2), 0.0)