aboutsummaryrefslogtreecommitdiff
path: root/docs/mllib-naive-bayes.md
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-03-01 16:28:15 -0800
committerXiangrui Meng <meng@databricks.com>2015-03-01 16:28:15 -0800
commit3f00bb3ef1384fabf86a68180d40a1a515f6f5e3 (patch)
tree565f37f071ee807bf114bb9a6b329f4a7351d7ef /docs/mllib-naive-bayes.md
parentaedbbaa3dda9cbc154cd52c07f6d296b972b0eb2 (diff)
downloadspark-3f00bb3ef1384fabf86a68180d40a1a515f6f5e3.tar.gz
spark-3f00bb3ef1384fabf86a68180d40a1a515f6f5e3.tar.bz2
spark-3f00bb3ef1384fabf86a68180d40a1a515f6f5e3.zip
[SPARK-6083] [MLLib] [DOC] Make Python API example consistent in NaiveBayes
Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #4834 from MechCoder/spark-6083 and squashes the following commits: 1cdd7b5 [MechCoder] Add parse function 65bbbe9 [MechCoder] [SPARK-6083] Make Python API example consistent in NaiveBayes
Diffstat (limited to 'docs/mllib-naive-bayes.md')
-rw-r--r--docs/mllib-naive-bayes.md26
1 files changed, 16 insertions, 10 deletions
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 5224a0b49a..55b8f2ce6c 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -115,22 +115,28 @@ used for evaluation and prediction.
Note that the Python API does not yet support model save/load but will in the future.
-<!-- TODO: Make Python's example consistent with Scala's and Java's. -->
{% highlight python %}
-from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.classification import NaiveBayes
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.regression import LabeledPoint
+
+def parseLine(line):
+ parts = line.split(',')
+ label = float(parts[0])
+ features = Vectors.dense([float(x) for x in parts[1].split(' ')])
+ return LabeledPoint(label, features)
+
+data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine)
-# an RDD of LabeledPoint
-data = sc.parallelize([
- LabeledPoint(0.0, [0.0, 0.0])
- ... # more labeled points
-])
+# Split data aproximately into training (60%) and test (40%)
+training, test = data.randomSplit([0.6, 0.4], seed = 0)
# Train a naive Bayes model.
-model = NaiveBayes.train(data, 1.0)
+model = NaiveBayes.train(training, 1.0)
-# Make prediction.
-prediction = model.predict([0.0, 0.0])
+# Make prediction and test accuracy.
+predictionAndLabel = test.map(lambda p : (model.predict(p.features), p.label))
+accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
{% endhighlight %}
</div>