aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--data/mllib/sample_naive_bayes_data.txt12
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java4
-rw-r--r--examples/src/main/python/mllib/naive_bayes_example.py13
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala14
4 files changed, 10 insertions, 33 deletions
diff --git a/data/mllib/sample_naive_bayes_data.txt b/data/mllib/sample_naive_bayes_data.txt
deleted file mode 100644
index bd22bea3a5..0000000000
--- a/data/mllib/sample_naive_bayes_data.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-0,1 0 0
-0,2 0 0
-0,3 0 0
-0,4 0 0
-1,0 1 0
-1,0 2 0
-1,0 3 0
-1,0 4 0
-2,0 0 1
-2,0 0 2
-2,0 0 3
-2,0 0 4 \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java
index 2b17dbb963..f4ec04b0c6 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java
@@ -36,9 +36,9 @@ public class JavaNaiveBayesExample {
SparkConf sparkConf = new SparkConf().setAppName("JavaNaiveBayesExample");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
// $example on$
- String path = "data/mllib/sample_naive_bayes_data.txt";
+ String path = "data/mllib/sample_libsvm_data.txt";
JavaRDD<LabeledPoint> inputData = MLUtils.loadLibSVMFile(jsc.sc(), path).toJavaRDD();
- JavaRDD<LabeledPoint>[] tmp = inputData.randomSplit(new double[]{0.6, 0.4}, 12345);
+ JavaRDD<LabeledPoint>[] tmp = inputData.randomSplit(new double[]{0.6, 0.4});
JavaRDD<LabeledPoint> training = tmp[0]; // training set
JavaRDD<LabeledPoint> test = tmp[1]; // test set
final NaiveBayesModel model = NaiveBayes.train(training.rdd(), 1.0);
diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py
index 35724f7d6a..749353b20e 100644
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@@ -29,15 +29,9 @@ import shutil
from pyspark import SparkContext
# $example on$
from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
-from pyspark.mllib.linalg import Vectors
-from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import MLUtils
-def parseLine(line):
- parts = line.split(',')
- label = float(parts[0])
- features = Vectors.dense([float(x) for x in parts[1].split(' ')])
- return LabeledPoint(label, features)
# $example off$
if __name__ == "__main__":
@@ -45,10 +39,11 @@ if __name__ == "__main__":
sc = SparkContext(appName="PythonNaiveBayesExample")
# $example on$
- data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine)
+ # Load and parse the data file.
+ data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
# Split data approximately into training (60%) and test (40%)
- training, test = data.randomSplit([0.6, 0.4], seed=0)
+ training, test = data.randomSplit([0.6, 0.4])
# Train a naive Bayes model.
model = NaiveBayes.train(training, 1.0)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala
index 0187ad603a..b321d8e127 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala
@@ -21,8 +21,7 @@ package org.apache.spark.examples.mllib
import org.apache.spark.{SparkConf, SparkContext}
// $example on$
import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
// $example off$
object NaiveBayesExample {
@@ -31,16 +30,11 @@ object NaiveBayesExample {
val conf = new SparkConf().setAppName("NaiveBayesExample")
val sc = new SparkContext(conf)
// $example on$
- val data = sc.textFile("data/mllib/sample_naive_bayes_data.txt")
- val parsedData = data.map { line =>
- val parts = line.split(',')
- LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
- }
+ // Load and parse the data file.
+ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
// Split data into training (60%) and test (40%).
- val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L)
- val training = splits(0)
- val test = splits(1)
+ val Array(training, test) = data.randomSplit(Array(0.6, 0.4))
val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial")