diff options
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala | 3 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala | 2 |
2 files changed, 3 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 38428826a8..a8beef8b12 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -299,7 +299,8 @@ class KMeansSummary private[clustering] ( * Size of each cluster. */ @Since("2.0.0") - lazy val size: Array[Int] = cluster.rdd.map { + lazy val clusterSizes: Array[Int] = cluster.rdd.map { case Row(clusterIdx: Int) => (clusterIdx, 1) }.reduceByKey(_ + _).collect().sortBy(_._1).map(_._2) + } diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala index d3a0df4063..ed735a4ea3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala @@ -37,7 +37,7 @@ private[r] class KMeansWrapper private ( lazy val k: Int = kMeansModel.getK - lazy val size: Array[Int] = kMeansModel.summary.size + lazy val size: Array[Int] = kMeansModel.summary.clusterSizes lazy val cluster: DataFrame = kMeansModel.summary.cluster |