aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/mllib/tests.py')
-rw-r--r--python/pyspark/mllib/tests.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 9d1e5be637..6f3ec8ac94 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -39,6 +39,7 @@ except:
class VectorTests(unittest.TestCase):
+
def test_serialize(self):
sv = SparseVector(4, {1: 1, 3: 2})
dv = array([1., 2., 3., 4.])
@@ -81,6 +82,7 @@ class VectorTests(unittest.TestCase):
class ListTests(PySparkTestCase):
+
"""
Test MLlib algorithms on plain lists, to make sure they're passed through
as NumPy arrays.
@@ -128,7 +130,7 @@ class ListTests(PySparkTestCase):
self.assertTrue(nb_model.predict(features[2]) <= 0)
self.assertTrue(nb_model.predict(features[3]) > 0)
- categoricalFeaturesInfo = {0: 3} # feature 0 has 3 categories
+ categoricalFeaturesInfo = {0: 3} # feature 0 has 3 categories
dt_model = \
DecisionTree.trainClassifier(rdd, numClasses=2,
categoricalFeaturesInfo=categoricalFeaturesInfo)
@@ -168,7 +170,7 @@ class ListTests(PySparkTestCase):
self.assertTrue(rr_model.predict(features[2]) <= 0)
self.assertTrue(rr_model.predict(features[3]) > 0)
- categoricalFeaturesInfo = {0: 2} # feature 0 has 2 categories
+ categoricalFeaturesInfo = {0: 2} # feature 0 has 2 categories
dt_model = \
DecisionTree.trainRegressor(rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
self.assertTrue(dt_model.predict(features[0]) <= 0)
@@ -179,6 +181,7 @@ class ListTests(PySparkTestCase):
@unittest.skipIf(not _have_scipy, "SciPy not installed")
class SciPyTests(PySparkTestCase):
+
"""
Test both vector operations and MLlib algorithms with SciPy sparse matrices,
if SciPy is available.
@@ -276,7 +279,7 @@ class SciPyTests(PySparkTestCase):
self.assertTrue(nb_model.predict(features[2]) <= 0)
self.assertTrue(nb_model.predict(features[3]) > 0)
- categoricalFeaturesInfo = {0: 3} # feature 0 has 3 categories
+ categoricalFeaturesInfo = {0: 3} # feature 0 has 3 categories
dt_model = DecisionTree.trainClassifier(rdd, numClasses=2,
categoricalFeaturesInfo=categoricalFeaturesInfo)
self.assertTrue(dt_model.predict(features[0]) <= 0)
@@ -315,7 +318,7 @@ class SciPyTests(PySparkTestCase):
self.assertTrue(rr_model.predict(features[2]) <= 0)
self.assertTrue(rr_model.predict(features[3]) > 0)
- categoricalFeaturesInfo = {0: 2} # feature 0 has 2 categories
+ categoricalFeaturesInfo = {0: 2} # feature 0 has 2 categories
dt_model = DecisionTree.trainRegressor(rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
self.assertTrue(dt_model.predict(features[0]) <= 0)
self.assertTrue(dt_model.predict(features[1]) > 0)