aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/pyspark/mllib/classification.py2
-rw-r--r--python/pyspark/mllib/tests.py8
2 files changed, 5 insertions, 5 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 3a23e0801f..c5844597c9 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -154,7 +154,7 @@ class NaiveBayesModel(object):
def predict(self, x):
"""Return the most likely class for a data vector x"""
- return self.labels[numpy.argmax(self.pi + _dot(x, self.theta))]
+ return self.labels[numpy.argmax(self.pi + _dot(x, self.theta.transpose()))]
class NaiveBayes(object):
@classmethod
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index d4771d779f..1ee96bb4af 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -104,10 +104,10 @@ class ListTests(PySparkTestCase):
def test_classification(self):
from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
data = [
- LabeledPoint(0.0, [1, 0]),
- LabeledPoint(1.0, [0, 1]),
- LabeledPoint(0.0, [2, 0]),
- LabeledPoint(1.0, [0, 2])
+ LabeledPoint(0.0, [1, 0, 0]),
+ LabeledPoint(1.0, [0, 1, 1]),
+ LabeledPoint(0.0, [2, 0, 0]),
+ LabeledPoint(1.0, [0, 2, 1])
]
rdd = self.sc.parallelize(data)
features = [p.features.tolist() for p in data]