aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/mllib')
-rw-r--r--examples/src/main/python/mllib/naive_bayes_example.py13
1 files changed, 4 insertions, 9 deletions
diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py
index 35724f7d6a..749353b20e 100644
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@@ -29,15 +29,9 @@ import shutil
from pyspark import SparkContext
# $example on$
from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
-from pyspark.mllib.linalg import Vectors
-from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import MLUtils
-def parseLine(line):
- parts = line.split(',')
- label = float(parts[0])
- features = Vectors.dense([float(x) for x in parts[1].split(' ')])
- return LabeledPoint(label, features)
# $example off$
if __name__ == "__main__":
@@ -45,10 +39,11 @@ if __name__ == "__main__":
sc = SparkContext(appName="PythonNaiveBayesExample")
# $example on$
- data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine)
+ # Load and parse the data file.
+ data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
# Split data approximately into training (60%) and test (40%)
- training, test = data.randomSplit([0.6, 0.4], seed=0)
+ training, test = data.randomSplit([0.6, 0.4])
# Train a naive Bayes model.
model = NaiveBayes.train(training, 1.0)