aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorYuhao Yang <hhbyyh@gmail.com>2015-03-12 15:17:46 -0700
committerXiangrui Meng <meng@databricks.com>2015-03-12 15:17:46 -0700
commitfb4787c9531be5dd9e512e79ff4ff45d24eb370d (patch)
tree08f91decc8b9dbc10df36a763e1022782e542cc9 /mllib
parent8f1bc7989b13c42aec7679b7c417175527b76419 (diff)
downloadspark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.tar.gz
spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.tar.bz2
spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.zip
[SPARK-6268][MLlib] KMeans parameter getter methods
jira: https://issues.apache.org/jira/browse/SPARK-6268 KMeans has many setters for parameters. It should have matching getters. Author: Yuhao Yang <hhbyyh@gmail.com> Closes #4974 from hhbyyh/get4Kmeans and squashes the following commits: f44d4dc [Yuhao Yang] add experimental to getRuns f94a3d7 [Yuhao Yang] add get for KMeans
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala37
1 files changed, 37 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 11633e8242..e41f941fd2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -52,12 +52,22 @@ class KMeans private (
*/
def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
+ /**
+ * Number of clusters to create (k).
+ */
+ def getK: Int = k
+
/** Set the number of clusters to create (k). Default: 2. */
def setK(k: Int): this.type = {
this.k = k
this
}
+ /**
+ * Maximum number of iterations to run.
+ */
+ def getMaxIterations: Int = maxIterations
+
/** Set maximum number of iterations to run. Default: 20. */
def setMaxIterations(maxIterations: Int): this.type = {
this.maxIterations = maxIterations
@@ -65,6 +75,11 @@ class KMeans private (
}
/**
+ * The initialization algorithm. This can be either "random" or "k-means||".
+ */
+ def getInitializationMode: String = initializationMode
+
+ /**
* Set the initialization algorithm. This can be either "random" to choose random points as
* initial cluster centers, or "k-means||" to use a parallel variant of k-means++
* (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
@@ -79,6 +94,13 @@ class KMeans private (
/**
* :: Experimental ::
+ * Number of runs of the algorithm to execute in parallel.
+ */
+ @Experimental
+ def getRuns: Int = runs
+
+ /**
+ * :: Experimental ::
* Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
* this many times with random starting conditions (configured by the initialization mode), then
* return the best clustering found over any run. Default: 1.
@@ -93,6 +115,11 @@ class KMeans private (
}
/**
+ * Number of steps for the k-means|| initialization mode
+ */
+ def getInitializationSteps: Int = initializationSteps
+
+ /**
* Set the number of steps for the k-means|| initialization mode. This is an advanced
* setting -- the default of 5 is almost always enough. Default: 5.
*/
@@ -105,6 +132,11 @@ class KMeans private (
}
/**
+ * The distance threshold within which we've consider centers to have converged.
+ */
+ def getEpsilon: Double = epsilon
+
+ /**
* Set the distance threshold within which we've consider centers to have converged.
* If all centers move less than this Euclidean distance, we stop iterating one run.
*/
@@ -113,6 +145,11 @@ class KMeans private (
this
}
+ /**
+ * The random seed for cluster initialization.
+ */
+ def getSeed: Long = seed
+
/** Set the random seed for cluster initialization. */
def setSeed(seed: Long): this.type = {
this.seed = seed