aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python
diff options
context:
space:
mode:
authorwm624@hotmail.com <wm624@hotmail.com>2016-05-27 20:59:24 -0500
committerSean Owen <sowen@cloudera.com>2016-05-27 20:59:24 -0500
commit5d4dafe8fdea49dcbd6b0e4c23e3791fa30c8911 (patch)
tree57f130594c229600e6f392c8f1b76012a5bd5ddd /examples/src/main/python
parent4a2fb8b87ca4517e0f4a1d7a1a1b3c08c1c1294d (diff)
downloadspark-5d4dafe8fdea49dcbd6b0e4c23e3791fa30c8911.tar.gz
spark-5d4dafe8fdea49dcbd6b0e4c23e3791fa30c8911.tar.bz2
spark-5d4dafe8fdea49dcbd6b0e4c23e3791fa30c8911.zip
[SPARK-15449][MLLIB][EXAMPLE] Wrong Data Format - Documentation Issue
## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) In the MLLib naivebayes example, scala and python example doesn't use libsvm data, but Java does. I make changes in scala and python example to use the libsvm data as the same as Java example. ## How was this patch tested? Manual tests Author: wm624@hotmail.com <wm624@hotmail.com> Closes #13301 from wangmiao1981/example.
Diffstat (limited to 'examples/src/main/python')
-rw-r--r--examples/src/main/python/mllib/naive_bayes_example.py13
1 files changed, 4 insertions, 9 deletions
diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py
index 35724f7d6a..749353b20e 100644
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@@ -29,15 +29,9 @@ import shutil
from pyspark import SparkContext
# $example on$
from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
-from pyspark.mllib.linalg import Vectors
-from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import MLUtils
-def parseLine(line):
- parts = line.split(',')
- label = float(parts[0])
- features = Vectors.dense([float(x) for x in parts[1].split(' ')])
- return LabeledPoint(label, features)
# $example off$
if __name__ == "__main__":
@@ -45,10 +39,11 @@ if __name__ == "__main__":
sc = SparkContext(appName="PythonNaiveBayesExample")
# $example on$
- data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine)
+ # Load and parse the data file.
+ data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
# Split data approximately into training (60%) and test (40%)
- training, test = data.randomSplit([0.6, 0.4], seed=0)
+ training, test = data.randomSplit([0.6, 0.4])
# Train a naive Bayes model.
model = NaiveBayes.train(training, 1.0)