aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib
diff options
context:
space:
mode:
authorGábor Lipták <gliptak@gmail.com>2016-01-19 14:06:53 -0800
committerXiangrui Meng <meng@databricks.com>2016-01-19 14:06:53 -0800
commitc6f971b4aeca7265ab374fa46c5c452461d9b6a7 (patch)
treebc328578e9976f621b9be5862bd84b5461d755f5 /python/pyspark/mllib
parentc78e2080e00a73159ab749691ad634fa6c0a2302 (diff)
downloadspark-c6f971b4aeca7265ab374fa46c5c452461d9b6a7.tar.gz
spark-c6f971b4aeca7265ab374fa46c5c452461d9b6a7.tar.bz2
spark-c6f971b4aeca7265ab374fa46c5c452461d9b6a7.zip
[SPARK-11295] Add packages to JUnit output for Python tests
SPARK-11295 Add packages to JUnit output for Python tests This improves grouping/display of test case results. Author: Gábor Lipták <gliptak@gmail.com> Closes #9263 from gliptak/SPARK-11295.
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r--python/pyspark/mllib/tests.py24
1 files changed, 14 insertions, 10 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 32ed48e103..ea7d297cba 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -77,21 +77,24 @@ except:
pass
ser = PickleSerializer()
-sc = SparkContext('local[4]', "MLlib tests")
class MLlibTestCase(unittest.TestCase):
def setUp(self):
- self.sc = sc
+ self.sc = SparkContext('local[4]', "MLlib tests")
+
+ def tearDown(self):
+ self.sc.stop()
class MLLibStreamingTestCase(unittest.TestCase):
def setUp(self):
- self.sc = sc
+ self.sc = SparkContext('local[4]', "MLlib tests")
self.ssc = StreamingContext(self.sc, 1.0)
def tearDown(self):
self.ssc.stop(False)
+ self.sc.stop()
@staticmethod
def _eventually(condition, timeout=30.0, catch_assertions=False):
@@ -1166,7 +1169,7 @@ class StreamingKMeansTest(MLLibStreamingTestCase):
clusterWeights=[1.0, 1.0, 1.0, 1.0])
predict_data = [[[1.5, 1.5]], [[-1.5, 1.5]], [[-1.5, -1.5]], [[1.5, -1.5]]]
- predict_data = [sc.parallelize(batch, 1) for batch in predict_data]
+ predict_data = [self.sc.parallelize(batch, 1) for batch in predict_data]
predict_stream = self.ssc.queueStream(predict_data)
predict_val = stkm.predictOn(predict_stream)
@@ -1197,7 +1200,7 @@ class StreamingKMeansTest(MLLibStreamingTestCase):
# classification based in the initial model would have been 0
# proving that the model is updated.
batches = [[[-0.5], [0.6], [0.8]], [[0.2], [-0.1], [0.3]]]
- batches = [sc.parallelize(batch) for batch in batches]
+ batches = [self.sc.parallelize(batch) for batch in batches]
input_stream = self.ssc.queueStream(batches)
predict_results = []
@@ -1230,7 +1233,7 @@ class LinearDataGeneratorTests(MLlibTestCase):
self.assertEqual(len(point.features), 3)
linear_data = LinearDataGenerator.generateLinearRDD(
- sc=sc, nexamples=6, nfeatures=2, eps=0.1,
+ sc=self.sc, nexamples=6, nfeatures=2, eps=0.1,
nParts=2, intercept=0.0).collect()
self.assertEqual(len(linear_data), 6)
for point in linear_data:
@@ -1406,7 +1409,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
for i in range(10):
batch = LinearDataGenerator.generateLinearInput(
0.0, [10.0, 10.0], xMean, xVariance, 100, 42 + i, 0.1)
- batches.append(sc.parallelize(batch))
+ batches.append(self.sc.parallelize(batch))
input_stream = self.ssc.queueStream(batches)
slr.trainOn(input_stream)
@@ -1430,7 +1433,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
for i in range(10):
batch = LinearDataGenerator.generateLinearInput(
0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1)
- batches.append(sc.parallelize(batch))
+ batches.append(self.sc.parallelize(batch))
model_weights = []
input_stream = self.ssc.queueStream(batches)
@@ -1463,7 +1466,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
0.0, [10.0, 10.0], [0.0, 0.0], [1.0 / 3.0, 1.0 / 3.0],
100, 42 + i, 0.1)
batches.append(
- sc.parallelize(batch).map(lambda lp: (lp.label, lp.features)))
+ self.sc.parallelize(batch).map(lambda lp: (lp.label, lp.features)))
input_stream = self.ssc.queueStream(batches)
output_stream = slr.predictOnValues(input_stream)
@@ -1494,7 +1497,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
for i in range(10):
batch = LinearDataGenerator.generateLinearInput(
0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1)
- batches.append(sc.parallelize(batch))
+ batches.append(self.sc.parallelize(batch))
predict_batches = [
b.map(lambda lp: (lp.label, lp.features)) for b in batches]
@@ -1580,6 +1583,7 @@ class ALSTests(MLlibTestCase):
if __name__ == "__main__":
+ from pyspark.mllib.tests import *
if not _have_scipy:
print("NOTE: Skipping SciPy tests as it does not seem to be installed")
if xmlrunner: