diff options
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 11633e8242..e41f941fd2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -52,12 +52,22 @@ class KMeans private ( */ def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong()) + /** + * Number of clusters to create (k). + */ + def getK: Int = k + /** Set the number of clusters to create (k). Default: 2. */ def setK(k: Int): this.type = { this.k = k this } + /** + * Maximum number of iterations to run. + */ + def getMaxIterations: Int = maxIterations + /** Set maximum number of iterations to run. Default: 20. */ def setMaxIterations(maxIterations: Int): this.type = { this.maxIterations = maxIterations @@ -65,6 +75,11 @@ class KMeans private ( } /** + * The initialization algorithm. This can be either "random" or "k-means||". + */ + def getInitializationMode: String = initializationMode + + /** * Set the initialization algorithm. This can be either "random" to choose random points as * initial cluster centers, or "k-means||" to use a parallel variant of k-means++ * (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||. @@ -79,6 +94,13 @@ class KMeans private ( /** * :: Experimental :: + * Number of runs of the algorithm to execute in parallel. + */ + @Experimental + def getRuns: Int = runs + + /** + * :: Experimental :: * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm * this many times with random starting conditions (configured by the initialization mode), then * return the best clustering found over any run. Default: 1. @@ -93,6 +115,11 @@ class KMeans private ( } /** + * Number of steps for the k-means|| initialization mode + */ + def getInitializationSteps: Int = initializationSteps + + /** * Set the number of steps for the k-means|| initialization mode. This is an advanced * setting -- the default of 5 is almost always enough. Default: 5. */ @@ -105,6 +132,11 @@ class KMeans private ( } /** + * The distance threshold within which we've consider centers to have converged. + */ + def getEpsilon: Double = epsilon + + /** * Set the distance threshold within which we've consider centers to have converged. * If all centers move less than this Euclidean distance, we stop iterating one run. */ @@ -113,6 +145,11 @@ class KMeans private ( this } + /** + * The random seed for cluster initialization. + */ + def getSeed: Long = seed + /** Set the random seed for cluster initialization. */ def setSeed(seed: Long): this.type = { this.seed = seed |