diff options
author | Xiangrui Meng <meng@databricks.com> | 2014-08-19 21:01:23 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2014-08-19 21:01:23 -0700 |
commit | 068b6fe6a10eb1c6b2102d88832203267f030e85 (patch) | |
tree | eb12c866970102b636d0edb80351bee0b6cb7b28 /mllib/src/test | |
parent | 0e3ab94d413fd70fff748fded42ab5e2ebd66fcc (diff) | |
download | spark-068b6fe6a10eb1c6b2102d88832203267f030e85.tar.gz spark-068b6fe6a10eb1c6b2102d88832203267f030e85.tar.bz2 spark-068b6fe6a10eb1c6b2102d88832203267f030e85.zip |
[SPARK-3130][MLLIB] detect negative values in naive Bayes
because NB treats feature values as term frequencies. jkbradley
Author: Xiangrui Meng <meng@databricks.com>
Closes #2038 from mengxr/nb-neg and squashes the following commits:
52c37c3 [Xiangrui Meng] address comments
65f892d [Xiangrui Meng] detect negative values in nb
Diffstat (limited to 'mllib/src/test')
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala index 06cdd04f5f..80989bc074 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala @@ -21,6 +21,7 @@ import scala.util.Random import org.scalatest.FunSuite +import org.apache.spark.SparkException import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext} @@ -95,6 +96,33 @@ class NaiveBayesSuite extends FunSuite with LocalSparkContext { // Test prediction on Array. validatePrediction(validationData.map(row => model.predict(row.features)), validationData) } + + test("detect negative values") { + val dense = Seq( + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(0.0, Vectors.dense(-1.0)), + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(1.0, Vectors.dense(0.0))) + intercept[SparkException] { + NaiveBayes.train(sc.makeRDD(dense, 2)) + } + val sparse = Seq( + LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))), + LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(-1.0))), + LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))), + LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty))) + intercept[SparkException] { + NaiveBayes.train(sc.makeRDD(sparse, 2)) + } + val nan = Seq( + LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))), + LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(Double.NaN))), + LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))), + LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty))) + intercept[SparkException] { + NaiveBayes.train(sc.makeRDD(nan, 2)) + } + } } class NaiveBayesClusterSuite extends FunSuite with LocalClusterSparkContext { |