aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShally Sangal <shallysangal@gmail.com>2016-04-05 10:41:59 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-04-05 10:41:59 -0700
commitd35690158810465809679ef39548e1400b38d448 (patch)
tree294c86dded53ec0dd7f5138ebf4754e298a1a714
parent78071736799b6c86b5c01b27395f4ab87075342b (diff)
downloadspark-d35690158810465809679ef39548e1400b38d448.tar.gz
spark-d35690158810465809679ef39548e1400b38d448.tar.bz2
spark-d35690158810465809679ef39548e1400b38d448.zip
[SPARK-14284][ML] KMeansSummary deprecating size; adding clusterSizes
## What changes were proposed in this pull request? KMeansSummary class : deprecated size and added clusterSizes Author: Shally Sangal <shallysangal@gmail.com> Closes #12084 from shallys/master.
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala2
2 files changed, 3 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 38428826a8..a8beef8b12 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -299,7 +299,8 @@ class KMeansSummary private[clustering] (
* Size of each cluster.
*/
@Since("2.0.0")
- lazy val size: Array[Int] = cluster.rdd.map {
+ lazy val clusterSizes: Array[Int] = cluster.rdd.map {
case Row(clusterIdx: Int) => (clusterIdx, 1)
}.reduceByKey(_ + _).collect().sortBy(_._1).map(_._2)
+
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
index d3a0df4063..ed735a4ea3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
@@ -37,7 +37,7 @@ private[r] class KMeansWrapper private (
lazy val k: Int = kMeansModel.getK
- lazy val size: Array[Int] = kMeansModel.summary.size
+ lazy val size: Array[Int] = kMeansModel.summary.clusterSizes
lazy val cluster: DataFrame = kMeansModel.summary.cluster