From d20559b157743981b9c09e286f2aaff8cbefab59 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Wed, 25 Feb 2015 16:13:17 -0800
Subject: [SPARK-5974] [SPARK-5980] [mllib] [python] [docs] Update ML guide
 with save/load, Python GBT

* Add GradientBoostedTrees Python examples to ML guide
  * I ran these in the pyspark shell, and they worked.
* Add save/load to examples in ML guide
* Added note to python docs about predict,transform not working within RDD actions,transformations in some cases (See SPARK-5981)

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #4750 from jkbradley/SPARK-5974 and squashes the following commits:

c410e38 [Joseph K. Bradley] Added note to LabeledPoint about attributes
bcae18b [Joseph K. Bradley] Added import of models for save/load examples in ml guide.  Fixed line length for tree.py, feature.py (but not other ML Pyspark files yet).
6d81c3e [Joseph K. Bradley] completed python GBT examples
9903309 [Joseph K. Bradley] Added note to python docs about predict,transform not working within RDD actions,transformations in some cases
c7dfad8 [Joseph K. Bradley] Added model save/load to ML guide.  Added GBT examples to ML guide
---
 docs/mllib-naive-bayes.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'docs/mllib-naive-bayes.md')
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index d5b044d94f..81173255b5 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -37,7 +37,7 @@ smoothing parameter `lambda` as input, and output a
 can be used for evaluation and prediction.
 
 {% highlight scala %}
-import org.apache.spark.mllib.classification.NaiveBayes
+import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 
@@ -55,6 +55,9 @@ val model = NaiveBayes.train(training, lambda = 1.0)
 
 val predictionAndLabel = test.map(p => (model.predict(p.features), p.label))
 val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()
+
+model.save("myModelPath")
+val sameModel = NaiveBayesModel.load("myModelPath")
 {% endhighlight %}
 </div>
 
@@ -93,6 +96,9 @@ double accuracy = predictionAndLabel.filter(new Function<Tuple2<Double, Double>,
       return pl._1().equals(pl._2());
     }
   }).count() / (double) test.count();
+
+model.save("myModelPath");
+NaiveBayesModel sameModel = NaiveBayesModel.load("myModelPath");
 {% endhighlight %}
 </div>
 
@@ -105,6 +111,8 @@ smoothing parameter `lambda` as input, and output a
 [NaiveBayesModel](api/python/pyspark.mllib.classification.NaiveBayesModel-class.html), which can be
 used for evaluation and prediction.
 
+Note that the Python API does not yet support model save/load but will in the future.
+
 <!-- TODO: Make Python's example consistent with Scala's and Java's. -->
 {% highlight python %}
 from pyspark.mllib.regression import LabeledPoint
-- 
cgit v1.2.3