aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-06-26 08:12:22 -0700
committerDavies Liu <davies@databricks.com>2015-06-26 08:12:22 -0700
commit41afa16500e682475eaa80e31c0434b7ab66abcb (patch)
tree298d6e9e0739fa91a99422c1308eb15ff6ad12f3 /python
parent37bf76a2de2143ec6348a3d43b782227849520cc (diff)
downloadspark-41afa16500e682475eaa80e31c0434b7ab66abcb.tar.gz
spark-41afa16500e682475eaa80e31c0434b7ab66abcb.tar.bz2
spark-41afa16500e682475eaa80e31c0434b7ab66abcb.zip
[SPARK-8652] [PYSPARK] Check return value for all uses of doctest.testmod()
This patch addresses a critical issue in the PySpark tests: Several of our Python modules' `__main__` methods call `doctest.testmod()` in order to run doctests but forget to check and handle its return value. As a result, some PySpark test failures can go unnoticed because they will not fail the build. Fortunately, there was only one test failure which was masked by this bug: a `pyspark.profiler` doctest was failing due to changes in RDD pipelining. Author: Josh Rosen <joshrosen@databricks.com> Closes #7032 from JoshRosen/testmod-fix and squashes the following commits: 60dbdc0 [Josh Rosen] Account for int vs. long formatting change in Python 3 8b8d80a [Josh Rosen] Fix failing test. e6423f9 [Josh Rosen] Check return code for all uses of doctest.testmod().
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/accumulators.py4
-rw-r--r--python/pyspark/broadcast.py4
-rw-r--r--python/pyspark/heapq3.py5
-rw-r--r--python/pyspark/profiler.py8
-rw-r--r--python/pyspark/serializers.py8
-rw-r--r--python/pyspark/shuffle.py4
-rw-r--r--python/pyspark/streaming/util.py4
7 files changed, 26 insertions, 11 deletions
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index adca90ddaf..6ef8cf53cc 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -264,4 +264,6 @@ def _start_update_server():
if __name__ == "__main__":
import doctest
- doctest.testmod()
+ (failure_count, test_count) = doctest.testmod()
+ if failure_count:
+ exit(-1)
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 3de4615428..663c9abe08 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -115,4 +115,6 @@ class Broadcast(object):
if __name__ == "__main__":
import doctest
- doctest.testmod()
+ (failure_count, test_count) = doctest.testmod()
+ if failure_count:
+ exit(-1)
diff --git a/python/pyspark/heapq3.py b/python/pyspark/heapq3.py
index 4ef2afe035..b27e91a4cc 100644
--- a/python/pyspark/heapq3.py
+++ b/python/pyspark/heapq3.py
@@ -883,6 +883,7 @@ except ImportError:
if __name__ == "__main__":
-
import doctest
- print(doctest.testmod())
+ (failure_count, test_count) = doctest.testmod()
+ if failure_count:
+ exit(-1)
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index d18daaabfc..44d17bd629 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -90,9 +90,11 @@ class Profiler(object):
>>> sc = SparkContext('local', 'test', conf=conf, profiler_cls=MyCustomProfiler)
>>> sc.parallelize(range(1000)).map(lambda x: 2 * x).take(10)
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
+ >>> sc.parallelize(range(1000)).count()
+ 1000
>>> sc.show_profiles()
My custom profiles for RDD:1
- My custom profiles for RDD:2
+ My custom profiles for RDD:3
>>> sc.stop()
"""
@@ -169,4 +171,6 @@ class BasicProfiler(Profiler):
if __name__ == "__main__":
import doctest
- doctest.testmod()
+ (failure_count, test_count) = doctest.testmod()
+ if failure_count:
+ exit(-1)
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 7f9d0a338d..411b4dbf48 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -44,8 +44,8 @@ which contains two batches of two objects:
>>> rdd.glom().collect()
[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
->>> rdd._jrdd.count()
-8L
+>>> int(rdd._jrdd.count())
+8
>>> sc.stop()
"""
@@ -556,4 +556,6 @@ def write_with_length(obj, stream):
if __name__ == '__main__':
import doctest
- doctest.testmod()
+ (failure_count, test_count) = doctest.testmod()
+ if failure_count:
+ exit(-1)
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 67752c0d15..8fb71bac64 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -838,4 +838,6 @@ class ExternalGroupBy(ExternalMerger):
if __name__ == "__main__":
import doctest
- doctest.testmod()
+ (failure_count, test_count) = doctest.testmod()
+ if failure_count:
+ exit(-1)
diff --git a/python/pyspark/streaming/util.py b/python/pyspark/streaming/util.py
index 34291f30a5..a9bfec2aab 100644
--- a/python/pyspark/streaming/util.py
+++ b/python/pyspark/streaming/util.py
@@ -125,4 +125,6 @@ def rddToFileName(prefix, suffix, timestamp):
if __name__ == "__main__":
import doctest
- doctest.testmod()
+ (failure_count, test_count) = doctest.testmod()
+ if failure_count:
+ exit(-1)