[SPARK-7949] [MLLIB] [DOC] update document with some missing save/load

add save load for examples: KMeansModel PowerIterationClusteringModel Word2VecModel IsotonicRegressionModel Author: Yuhao Yang <hhbyyh@gmail.com> Closes #6498 from hhbyyh/docSaveLoad and squashes the following commits: 7f9f06d [Yuhao Yang] add missing imports c604cad [Yuhao Yang] Merge remote-tracking branch 'upstream/master' into docSaveLoad 1dd77cc [Yuhao Yang] update document with some missing save/load
author: Yuhao Yang <hhbyyh@gmail.com> 2015-05-31 11:51:49 -0700
committer: Joseph K. Bradley <joseph@databricks.com> 2015-05-31 11:51:49 -0700
commit: 0674700303da3e4737d73f5fabd2a925ec712f63 (patch)
tree: accc878afed083e7603a6ff11ad7f5be71e45555 /docs/mllib-clustering.md
parent: e1067d0ad1c32c678c23d76d7653b51770795831 (diff)
download: spark-0674700303da3e4737d73f5fabd2a925ec712f63.tar.gz
spark-0674700303da3e4737d73f5fabd2a925ec712f63.tar.bz2
spark-0674700303da3e4737d73f5fabd2a925ec712f63.zip
1 files changed, 24 insertions, 4 deletions
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index f41ca70952..dac22f736e 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -47,7 +47,7 @@ Set Sum of Squared Error (WSSSE). You can reduce this error measure by increasin
 optimal *k* is usually one where there is an "elbow" in the WSSSE graph.
 
 {% highlight scala %}
-import org.apache.spark.mllib.clustering.KMeans
+import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
 import org.apache.spark.mllib.linalg.Vectors
 
 // Load and parse the data
@@ -62,6 +62,10 @@ val clusters = KMeans.train(parsedData, numClusters, numIterations)
 // Evaluate clustering by computing Within Set Sum of Squared Errors
 val WSSSE = clusters.computeCost(parsedData)
 println("Within Set Sum of Squared Errors = " + WSSSE)
+
+// Save and load model
+clusters.save(sc, "myModelPath")
+val sameModel = KMeansModel.load(sc, "myModelPath")
 {% endhighlight %}
 </div>
 
@@ -110,6 +114,10 @@ public class KMeansExample {
     // Evaluate clustering by computing Within Set Sum of Squared Errors
     double WSSSE = clusters.computeCost(parsedData.rdd());
     System.out.println("Within Set Sum of Squared Errors = " + WSSSE);
+
+    // Save and load model
+    clusters.save(sc.sc(), "myModelPath");
+    KMeansModel sameModel = KMeansModel.load(sc.sc(), "myModelPath");
   }
 }
 {% endhighlight %}
@@ -124,7 +132,7 @@ Within Set Sum of Squared Error (WSSSE). You can reduce this error measure by in
 fact the optimal *k* is usually one where there is an "elbow" in the WSSSE graph.
 
 {% highlight python %}
-from pyspark.mllib.clustering import KMeans
+from pyspark.mllib.clustering import KMeans, KMeansModel
 from numpy import array
 from math import sqrt
 
@@ -143,6 +151,10 @@ def error(point):
 
 WSSSE = parsedData.map(lambda point: error(point)).reduce(lambda x, y: x + y)
 print("Within Set Sum of Squared Error = " + str(WSSSE))
+
+# Save and load model
+clusters.save(sc, "myModelPath")
+sameModel = KMeansModel.load(sc, "myModelPath")
 {% endhighlight %}
 </div>
 
@@ -312,12 +324,12 @@ Calling `PowerIterationClustering.run` returns a
 which contains the computed clustering assignments.
 
 {% highlight scala %}
-import org.apache.spark.mllib.clustering.PowerIterationClustering
+import org.apache.spark.mllib.clustering.{PowerIterationClustering, PowerIterationClusteringModel}
 import org.apache.spark.mllib.linalg.Vectors
 
 val similarities: RDD[(Long, Long, Double)] = ...
 
-val pic = new PowerIteartionClustering()
+val pic = new PowerIterationClustering()
   .setK(3)
   .setMaxIterations(20)
 val model = pic.run(similarities)
@@ -325,6 +337,10 @@ val model = pic.run(similarities)
 model.assignments.foreach { a =>
   println(s"${a.id} -> ${a.cluster}")
 }
+
+// Save and load model
+model.save(sc, "myModelPath")
+val sameModel = PowerIterationClusteringModel.load(sc, "myModelPath")
 {% endhighlight %}
 
 A full example that produces the experiment described in the PIC paper can be found under
@@ -360,6 +376,10 @@ PowerIterationClusteringModel model = pic.run(similarities);
 for (PowerIterationClustering.Assignment a: model.assignments().toJavaRDD().collect()) {
   System.out.println(a.id() + " -> " + a.cluster());
 }
+
+// Save and load model
+model.save(sc.sc(), "myModelPath");
+PowerIterationClusteringModel sameModel = PowerIterationClusteringModel.load(sc.sc(), "myModelPath");
 {% endhighlight %}
 </div>
author	Yuhao Yang <hhbyyh@gmail.com>	2015-05-31 11:51:49 -0700
committer	Joseph K. Bradley <joseph@databricks.com>	2015-05-31 11:51:49 -0700
commit	0674700303da3e4737d73f5fabd2a925ec712f63 (patch)
tree	accc878afed083e7603a6ff11ad7f5be71e45555 /docs/mllib-clustering.md
parent	e1067d0ad1c32c678c23d76d7653b51770795831 (diff)
download	spark-0674700303da3e4737d73f5fabd2a925ec712f63.tar.gz spark-0674700303da3e4737d73f5fabd2a925ec712f63.tar.bz2 spark-0674700303da3e4737d73f5fabd2a925ec712f63.zip