Response to comments from Reynold, Ameet and Evan

* Arguments renamed according to Ameet's suggestion * Using DoubleMatrix instead of Array[Double] in computation * Removed arguments C (kinds of label) and D (dimension of feature vector) from NaiveBayes.train() * Replaced reduceByKey with foldByKey to avoid modifying original input data
author: Lian, Cheng <rhythm.mail@gmail.com> 2013-12-30 22:46:32 +0800
committer: Lian, Cheng <rhythm.mail@gmail.com> 2013-12-30 22:46:32 +0800
commit: 6d0e2e86dfbca88abc847d3babac2d1f82d61aaf (patch)
tree: 982302a5b1b2485ad08b992d9468e2b7c9eb4cc9 /mllib/src/test
parent: f150b6e76c56ed6f604e6dbda7bce6b6278929fb (diff)
download: spark-6d0e2e86dfbca88abc847d3babac2d1f82d61aaf.tar.gz
spark-6d0e2e86dfbca88abc847d3babac2d1f82d61aaf.tar.bz2
spark-6d0e2e86dfbca88abc847d3babac2d1f82d61aaf.zip
1 files changed, 16 insertions, 16 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index a2821347a7..18575f410c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -38,20 +38,20 @@ object NaiveBayesSuite {
 
   // Generate input of the form Y = (weightMatrix*x).argmax()
   def generateNaiveBayesInput(
-      weightPerLabel: Array[Double],          // 1XC
-      weightsMatrix: Array[Array[Double]],    // CXD
+      pi: Array[Double],            // 1XC
+      theta: Array[Array[Double]],  // CXD
       nPoints: Int,
       seed: Int): Seq[LabeledPoint] = {
-    val D = weightsMatrix(0).length
+    val D = theta(0).length
     val rnd = new Random(seed)
 
-    val _weightPerLabel = weightPerLabel.map(math.pow(math.E, _))
-    val _weightMatrix = weightsMatrix.map(row => row.map(math.pow(math.E, _)))
+    val _pi = pi.map(math.pow(math.E, _))
+    val _theta = theta.map(row => row.map(math.pow(math.E, _)))
 
     for (i <- 0 until nPoints) yield {
-      val y = calcLabel(rnd.nextDouble(), _weightPerLabel)
+      val y = calcLabel(rnd.nextDouble(), _pi)
       val xi = Array.tabulate[Double](D) { j =>
-        if (rnd.nextDouble() < _weightMatrix(y)(j)) 1 else 0
+        if (rnd.nextDouble() < _theta(y)(j)) 1 else 0
       }
 
       LabeledPoint(y, xi)
@@ -83,20 +83,20 @@ class NaiveBayesSuite extends FunSuite with BeforeAndAfterAll {
   test("Naive Bayes") {
     val nPoints = 10000
 
-    val weightPerLabel = Array(math.log(0.5), math.log(0.3), math.log(0.2))
-    val weightsMatrix = Array(
-      Array(math.log(0.91), math.log(0.03), math.log(0.03), math.log(0.03)), // label 0
-      Array(math.log(0.03), math.log(0.91), math.log(0.03), math.log(0.03)), // label 1
-      Array(math.log(0.03), math.log(0.03), math.log(0.91), math.log(0.03))  // label 2
-    )
+    val pi = Array(0.5, 0.3, 0.2).map(math.log)
+    val theta = Array(
+      Array(0.91, 0.03, 0.03, 0.03), // label 0
+      Array(0.03, 0.91, 0.03, 0.03), // label 1
+      Array(0.03, 0.03, 0.91, 0.03)  // label 2
+    ).map(_.map(math.log))
 
-    val testData = NaiveBayesSuite.generateNaiveBayesInput(weightPerLabel, weightsMatrix, nPoints, 42)
+    val testData = NaiveBayesSuite.generateNaiveBayesInput(pi, theta, nPoints, 42)
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
 
-    val model = NaiveBayes.train(3, 4, testRDD)
+    val model = NaiveBayes.train(testRDD)
 
-    val validationData = NaiveBayesSuite.generateNaiveBayesInput(weightPerLabel, weightsMatrix, nPoints, 17)
+    val validationData = NaiveBayesSuite.generateNaiveBayesInput(pi, theta, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
 
     // Test prediction on RDD.
author	Lian, Cheng <rhythm.mail@gmail.com>	2013-12-30 22:46:32 +0800
committer	Lian, Cheng <rhythm.mail@gmail.com>	2013-12-30 22:46:32 +0800
commit	6d0e2e86dfbca88abc847d3babac2d1f82d61aaf (patch)
tree	982302a5b1b2485ad08b992d9468e2b7c9eb4cc9 /mllib/src/test
parent	f150b6e76c56ed6f604e6dbda7bce6b6278929fb (diff)
download	spark-6d0e2e86dfbca88abc847d3babac2d1f82d61aaf.tar.gz spark-6d0e2e86dfbca88abc847d3babac2d1f82d61aaf.tar.bz2 spark-6d0e2e86dfbca88abc847d3babac2d1f82d61aaf.zip