diff options
author | Gábor Lipták <gliptak@gmail.com> | 2016-01-20 11:11:10 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-01-20 11:11:10 -0800 |
commit | 9bb35c5b59e58dbebbdc6856d611bff73dd35a91 (patch) | |
tree | 87163ccf9190707d865209e8598ab955d5eb5976 /python/pyspark/mllib/tests.py | |
parent | 9376ae723e4ec0515120c488541617a0538f8879 (diff) | |
download | spark-9bb35c5b59e58dbebbdc6856d611bff73dd35a91.tar.gz spark-9bb35c5b59e58dbebbdc6856d611bff73dd35a91.tar.bz2 spark-9bb35c5b59e58dbebbdc6856d611bff73dd35a91.zip |
[SPARK-11295][PYSPARK] Add packages to JUnit output for Python tests
This is #9263 from gliptak (improving grouping/display of test case results) with a small fix of bisecting k-means unit test.
Author: Gábor Lipták <gliptak@gmail.com>
Author: Xiangrui Meng <meng@databricks.com>
Closes #10850 from mengxr/SPARK-11295.
Diffstat (limited to 'python/pyspark/mllib/tests.py')
-rw-r--r-- | python/pyspark/mllib/tests.py | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 32ed48e103..79ce4959c9 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -77,21 +77,24 @@ except: pass ser = PickleSerializer() -sc = SparkContext('local[4]', "MLlib tests") class MLlibTestCase(unittest.TestCase): def setUp(self): - self.sc = sc + self.sc = SparkContext('local[4]', "MLlib tests") + + def tearDown(self): + self.sc.stop() class MLLibStreamingTestCase(unittest.TestCase): def setUp(self): - self.sc = sc + self.sc = SparkContext('local[4]', "MLlib tests") self.ssc = StreamingContext(self.sc, 1.0) def tearDown(self): self.ssc.stop(False) + self.sc.stop() @staticmethod def _eventually(condition, timeout=30.0, catch_assertions=False): @@ -423,7 +426,7 @@ class ListTests(MLlibTestCase): from pyspark.mllib.clustering import BisectingKMeans data = array([0.0, 0.0, 1.0, 1.0, 9.0, 8.0, 8.0, 9.0]).reshape(4, 2) bskm = BisectingKMeans() - model = bskm.train(sc.parallelize(data, 2), k=4) + model = bskm.train(self.sc.parallelize(data, 2), k=4) p = array([0.0, 0.0]) rdd_p = self.sc.parallelize([p]) self.assertEqual(model.predict(p), model.predict(rdd_p).first()) @@ -1166,7 +1169,7 @@ class StreamingKMeansTest(MLLibStreamingTestCase): clusterWeights=[1.0, 1.0, 1.0, 1.0]) predict_data = [[[1.5, 1.5]], [[-1.5, 1.5]], [[-1.5, -1.5]], [[1.5, -1.5]]] - predict_data = [sc.parallelize(batch, 1) for batch in predict_data] + predict_data = [self.sc.parallelize(batch, 1) for batch in predict_data] predict_stream = self.ssc.queueStream(predict_data) predict_val = stkm.predictOn(predict_stream) @@ -1197,7 +1200,7 @@ class StreamingKMeansTest(MLLibStreamingTestCase): # classification based in the initial model would have been 0 # proving that the model is updated. batches = [[[-0.5], [0.6], [0.8]], [[0.2], [-0.1], [0.3]]] - batches = [sc.parallelize(batch) for batch in batches] + batches = [self.sc.parallelize(batch) for batch in batches] input_stream = self.ssc.queueStream(batches) predict_results = [] @@ -1230,7 +1233,7 @@ class LinearDataGeneratorTests(MLlibTestCase): self.assertEqual(len(point.features), 3) linear_data = LinearDataGenerator.generateLinearRDD( - sc=sc, nexamples=6, nfeatures=2, eps=0.1, + sc=self.sc, nexamples=6, nfeatures=2, eps=0.1, nParts=2, intercept=0.0).collect() self.assertEqual(len(linear_data), 6) for point in linear_data: @@ -1406,7 +1409,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase): for i in range(10): batch = LinearDataGenerator.generateLinearInput( 0.0, [10.0, 10.0], xMean, xVariance, 100, 42 + i, 0.1) - batches.append(sc.parallelize(batch)) + batches.append(self.sc.parallelize(batch)) input_stream = self.ssc.queueStream(batches) slr.trainOn(input_stream) @@ -1430,7 +1433,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase): for i in range(10): batch = LinearDataGenerator.generateLinearInput( 0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1) - batches.append(sc.parallelize(batch)) + batches.append(self.sc.parallelize(batch)) model_weights = [] input_stream = self.ssc.queueStream(batches) @@ -1463,7 +1466,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase): 0.0, [10.0, 10.0], [0.0, 0.0], [1.0 / 3.0, 1.0 / 3.0], 100, 42 + i, 0.1) batches.append( - sc.parallelize(batch).map(lambda lp: (lp.label, lp.features))) + self.sc.parallelize(batch).map(lambda lp: (lp.label, lp.features))) input_stream = self.ssc.queueStream(batches) output_stream = slr.predictOnValues(input_stream) @@ -1494,7 +1497,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase): for i in range(10): batch = LinearDataGenerator.generateLinearInput( 0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1) - batches.append(sc.parallelize(batch)) + batches.append(self.sc.parallelize(batch)) predict_batches = [ b.map(lambda lp: (lp.label, lp.features)) for b in batches] @@ -1580,6 +1583,7 @@ class ALSTests(MLlibTestCase): if __name__ == "__main__": + from pyspark.mllib.tests import * if not _have_scipy: print("NOTE: Skipping SciPy tests as it does not seem to be installed") if xmlrunner: |