[SPARK-6268][MLlib] KMeans parameter getter methods

jira: https://issues.apache.org/jira/browse/SPARK-6268 KMeans has many setters for parameters. It should have matching getters. Author: Yuhao Yang <hhbyyh@gmail.com> Closes #4974 from hhbyyh/get4Kmeans and squashes the following commits: f44d4dc [Yuhao Yang] add experimental to getRuns f94a3d7 [Yuhao Yang] add get for KMeans
author: Yuhao Yang <hhbyyh@gmail.com> 2015-03-12 15:17:46 -0700
committer: Xiangrui Meng <meng@databricks.com> 2015-03-12 15:17:46 -0700
commit: fb4787c9531be5dd9e512e79ff4ff45d24eb370d (patch)
tree: 08f91decc8b9dbc10df36a763e1022782e542cc9 /mllib
parent: 8f1bc7989b13c42aec7679b7c417175527b76419 (diff)
download: spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.tar.gz
spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.tar.bz2
spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.zip
1 files changed, 37 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 11633e8242..e41f941fd2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -52,12 +52,22 @@ class KMeans private (
    */
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
 
+  /**
+   * Number of clusters to create (k).
+   */
+  def getK: Int = k
+
   /** Set the number of clusters to create (k). Default: 2. */
   def setK(k: Int): this.type = {
     this.k = k
     this
   }
 
+  /**
+   * Maximum number of iterations to run.
+   */
+  def getMaxIterations: Int = maxIterations
+
   /** Set maximum number of iterations to run. Default: 20. */
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
@@ -65,6 +75,11 @@ class KMeans private (
   }
 
   /**
+   * The initialization algorithm. This can be either "random" or "k-means||".
+   */
+  def getInitializationMode: String = initializationMode
+
+  /**
    * Set the initialization algorithm. This can be either "random" to choose random points as
    * initial cluster centers, or "k-means||" to use a parallel variant of k-means++
    * (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
@@ -79,6 +94,13 @@ class KMeans private (
 
   /**
    * :: Experimental ::
+   * Number of runs of the algorithm to execute in parallel.
+   */
+  @Experimental
+  def getRuns: Int = runs
+
+  /**
+   * :: Experimental ::
    * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
    * this many times with random starting conditions (configured by the initialization mode), then
    * return the best clustering found over any run. Default: 1.
@@ -93,6 +115,11 @@ class KMeans private (
   }
 
   /**
+   * Number of steps for the k-means|| initialization mode
+   */
+  def getInitializationSteps: Int = initializationSteps
+
+  /**
    * Set the number of steps for the k-means|| initialization mode. This is an advanced
    * setting -- the default of 5 is almost always enough. Default: 5.
    */
@@ -105,6 +132,11 @@ class KMeans private (
   }
 
   /**
+   * The distance threshold within which we've consider centers to have converged.
+   */
+  def getEpsilon: Double = epsilon
+
+  /**
    * Set the distance threshold within which we've consider centers to have converged.
    * If all centers move less than this Euclidean distance, we stop iterating one run.
    */
@@ -113,6 +145,11 @@ class KMeans private (
     this
   }
 
+  /**
+   * The random seed for cluster initialization.
+   */
+  def getSeed: Long = seed
+
   /** Set the random seed for cluster initialization. */
   def setSeed(seed: Long): this.type = {
     this.seed = seed
author	Yuhao Yang <hhbyyh@gmail.com>	2015-03-12 15:17:46 -0700
committer	Xiangrui Meng <meng@databricks.com>	2015-03-12 15:17:46 -0700
commit	fb4787c9531be5dd9e512e79ff4ff45d24eb370d (patch)
tree	08f91decc8b9dbc10df36a763e1022782e542cc9 /mllib
parent	8f1bc7989b13c42aec7679b7c417175527b76419 (diff)
download	spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.tar.gz spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.tar.bz2 spark-fb4787c9531be5dd9e512e79ff4ff45d24eb370d.zip