aboutsummaryrefslogtreecommitdiff
path: root/docs/mllib-clustering.md
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-03-25 14:45:23 -0700
committerXiangrui Meng <meng@databricks.com>2015-03-25 14:45:23 -0700
commit4fc4d0369e8240defe0ee83252426402f1a28a36 (patch)
tree0d4187756c9caf831a890fcf612b373642f5a92f /docs/mllib-clustering.md
parent435337381f093f95248c8f0204e60c0b366edc81 (diff)
downloadspark-4fc4d0369e8240defe0ee83252426402f1a28a36.tar.gz
spark-4fc4d0369e8240defe0ee83252426402f1a28a36.tar.bz2
spark-4fc4d0369e8240defe0ee83252426402f1a28a36.zip
[SPARK-5987] [MLlib] Save/load for GaussianMixtureModels
Should be self explanatory. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #4986 from MechCoder/spark-5987 and squashes the following commits: 7d2cd56 [MechCoder] Iterate over dataframe in a better way e7a14cb [MechCoder] Minor 33c84f9 [MechCoder] Store as Array[Data] instead of Data[Array] 505bd57 [MechCoder] Rebased over master and used MatrixUDT 7422bb4 [MechCoder] Store sigmas as Array[Double] instead of Array[Array[Double]] b9794e4 [MechCoder] Minor cb77095 [MechCoder] [SPARK-5987] Save/load for GaussianMixtureModels
Diffstat (limited to 'docs/mllib-clustering.md')
-rw-r--r--docs/mllib-clustering.md8
1 files changed, 8 insertions, 0 deletions
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 0b6db4fcb7..f5aa15b7d9 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -173,6 +173,7 @@ to the algorithm. We then output the parameters of the mixture model.
{% highlight scala %}
import org.apache.spark.mllib.clustering.GaussianMixture
+import org.apache.spark.mllib.clustering.GaussianMixtureModel
import org.apache.spark.mllib.linalg.Vectors
// Load and parse the data
@@ -182,6 +183,10 @@ val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble)))
// Cluster the data into two classes using GaussianMixture
val gmm = new GaussianMixture().setK(2).run(parsedData)
+// Save and load model
+gmm.save(sc, "myGMMModel")
+val sameModel = GaussianMixtureModel.load(sc, "myGMMModel")
+
// output parameters of max-likelihood model
for (i <- 0 until gmm.k) {
println("weight=%f\nmu=%s\nsigma=\n%s\n" format
@@ -231,6 +236,9 @@ public class GaussianMixtureExample {
// Cluster the data into two classes using GaussianMixture
GaussianMixtureModel gmm = new GaussianMixture().setK(2).run(parsedData.rdd());
+ // Save and load GaussianMixtureModel
+ gmm.save(sc, "myGMMModel")
+ GaussianMixtureModel sameModel = GaussianMixtureModel.load(sc, "myGMMModel")
// Output the parameters of the mixture model
for(int j=0; j<gmm.k(); j++) {
System.out.println("weight=%f\nmu=%s\nsigma=\n%s\n",