aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu ISHIKAWA <yuu.ishikawa@gmail.com>2015-11-10 16:42:28 -0800
committerDavies Liu <davies.liu@gmail.com>2015-11-10 16:42:28 -0800
commitc0e48dfa611fa5d94132af7e6f6731f60ab833da (patch)
tree76f616fe866b8344b8a8a30ff9426e36d486135c
parenta3989058c0938c8c59c278e7d1a766701cfa255b (diff)
downloadspark-c0e48dfa611fa5d94132af7e6f6731f60ab833da.tar.gz
spark-c0e48dfa611fa5d94132af7e6f6731f60ab833da.tar.bz2
spark-c0e48dfa611fa5d94132af7e6f6731f60ab833da.zip
[SPARK-11566] [MLLIB] [PYTHON] Refactoring GaussianMixtureModel.gaussians in Python
cc jkbradley Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #9534 from yu-iskw/SPARK-11566.
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala21
-rw-r--r--python/pyspark/mllib/clustering.py2
2 files changed, 7 insertions, 16 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
index 0ec88ef77d..6a3b20c88d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
@@ -17,14 +17,11 @@
package org.apache.spark.mllib.api.python
-import java.util.{List => JList}
-
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.JavaConverters
import org.apache.spark.SparkContext
-import org.apache.spark.mllib.linalg.{Vector, Vectors, Matrix}
import org.apache.spark.mllib.clustering.GaussianMixtureModel
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
/**
* Wrapper around GaussianMixtureModel to provide helper methods in Python
@@ -36,17 +33,11 @@ private[python] class GaussianMixtureModelWrapper(model: GaussianMixtureModel) {
/**
* Returns gaussians as a List of Vectors and Matrices corresponding each MultivariateGaussian
*/
- val gaussians: JList[Object] = {
- val modelGaussians = model.gaussians
- var i = 0
- var mu = ArrayBuffer.empty[Vector]
- var sigma = ArrayBuffer.empty[Matrix]
- while (i < k) {
- mu += modelGaussians(i).mu
- sigma += modelGaussians(i).sigma
- i += 1
+ val gaussians: Array[Byte] = {
+ val modelGaussians = model.gaussians.map { gaussian =>
+ Array[Any](gaussian.mu, gaussian.sigma)
}
- List(mu.toArray, sigma.toArray).map(_.asInstanceOf[Object]).asJava
+ SerDe.dumps(JavaConverters.seqAsJavaListConverter(modelGaussians).asJava)
}
def save(sc: SparkContext, path: String): Unit = model.save(sc, path)
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 1fa061dc2d..c9e6f1dec6 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -266,7 +266,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
return [
MultivariateGaussian(gaussian[0], gaussian[1])
- for gaussian in zip(*self.call("gaussians"))]
+ for gaussian in self.call("gaussians")]
@property
@since('1.4.0')