fix bugs of dot in python

If there are no `transpose()` in `self.theta`, a *ValueError: matrices are not aligned* is occurring. The former test case just ignore this situation. Author: Xusen Yin <yinxusen@gmail.com> Closes #463 from yinxusen/python-naive-bayes and squashes the following commits: fcbe3bc [Xusen Yin] fix bugs of dot in python
author: Xusen Yin <yinxusen@gmail.com> 2014-04-22 11:06:18 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2014-04-22 11:06:18 -0700
commit: c919798f0912dc03c8365b9a384d9ee6d5b25c51 (patch)
tree: 386eac712d26333b20a3950a551b6906a972824a
parent: 0f87e6ad4366a8c453a7415bc89399030003c264 (diff)
download: spark-c919798f0912dc03c8365b9a384d9ee6d5b25c51.tar.gz
spark-c919798f0912dc03c8365b9a384d9ee6d5b25c51.tar.bz2
spark-c919798f0912dc03c8365b9a384d9ee6d5b25c51.zip
2 files changed, 5 insertions, 5 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 3a23e0801f..c5844597c9 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -154,7 +154,7 @@ class NaiveBayesModel(object):
 
     def predict(self, x):
         """Return the most likely class for a data vector x"""
-        return self.labels[numpy.argmax(self.pi + _dot(x, self.theta))]
+        return self.labels[numpy.argmax(self.pi + _dot(x, self.theta.transpose()))]
 
 class NaiveBayes(object):
     @classmethod
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index d4771d779f..1ee96bb4af 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -104,10 +104,10 @@ class ListTests(PySparkTestCase):
     def test_classification(self):
         from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
         data = [
-            LabeledPoint(0.0, [1, 0]),
-            LabeledPoint(1.0, [0, 1]),
-            LabeledPoint(0.0, [2, 0]),
-            LabeledPoint(1.0, [0, 2])
+            LabeledPoint(0.0, [1, 0, 0]),
+            LabeledPoint(1.0, [0, 1, 1]),
+            LabeledPoint(0.0, [2, 0, 0]),
+            LabeledPoint(1.0, [0, 2, 1])
         ]
         rdd = self.sc.parallelize(data)
         features = [p.features.tolist() for p in data]
author	Xusen Yin <yinxusen@gmail.com>	2014-04-22 11:06:18 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2014-04-22 11:06:18 -0700
commit	c919798f0912dc03c8365b9a384d9ee6d5b25c51 (patch)
tree	386eac712d26333b20a3950a551b6906a972824a
parent	0f87e6ad4366a8c453a7415bc89399030003c264 (diff)
download	spark-c919798f0912dc03c8365b9a384d9ee6d5b25c51.tar.gz spark-c919798f0912dc03c8365b9a384d9ee6d5b25c51.tar.bz2 spark-c919798f0912dc03c8365b9a384d9ee6d5b25c51.zip