aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/tests.py
diff options
context:
space:
mode:
authorYandu Oppacher <yandu.oppacher@jadedpixel.com>2015-01-28 13:48:06 -0800
committerJosh Rosen <joshrosen@databricks.com>2015-01-28 13:48:06 -0800
commit3bead67d5926a2a798ca0e2bc71e747380493787 (patch)
treece36d8e926702f8da17b9f43c49e1682d1e9fccb /python/pyspark/tests.py
parenta731314c319a6f265060e05267844069027804fd (diff)
downloadspark-3bead67d5926a2a798ca0e2bc71e747380493787.tar.gz
spark-3bead67d5926a2a798ca0e2bc71e747380493787.tar.bz2
spark-3bead67d5926a2a798ca0e2bc71e747380493787.zip
[SPARK-4387][PySpark] Refactoring python profiling code to make it extensible
This PR is based on #3255 , fix conflicts and code style. Closes #3255. Author: Yandu Oppacher <yandu.oppacher@jadedpixel.com> Author: Davies Liu <davies@databricks.com> Closes #3901 from davies/refactor-python-profile-code and squashes the following commits: b4a9306 [Davies Liu] fix tests 4b79ce8 [Davies Liu] add docstring for profiler_cls 2700e47 [Davies Liu] use BasicProfiler as default 349e341 [Davies Liu] more refactor 6a5d4df [Davies Liu] refactor and fix tests 31bf6b6 [Davies Liu] fix code style 0864b5d [Yandu Oppacher] Remove unused method 76a6c37 [Yandu Oppacher] Added a profile collector to accumulate the profilers per stage 9eefc36 [Yandu Oppacher] Fix doc 9ace076 [Yandu Oppacher] Refactor of profiler, and moved tests around 8739aff [Yandu Oppacher] Code review fixes 9bda3ec [Yandu Oppacher] Refactor profiler code
Diffstat (limited to 'python/pyspark/tests.py')
-rw-r--r--python/pyspark/tests.py40
1 files changed, 31 insertions, 9 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index e694ffcff5..081a77fbb0 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -53,6 +53,7 @@ from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, External
from pyspark.sql import SQLContext, IntegerType, Row, ArrayType, StructType, StructField, \
UserDefinedType, DoubleType
from pyspark import shuffle
+from pyspark.profiler import BasicProfiler
_have_scipy = False
_have_numpy = False
@@ -743,16 +744,12 @@ class ProfilerTests(PySparkTestCase):
self.sc = SparkContext('local[4]', class_name, conf=conf)
def test_profiler(self):
+ self.do_computation()
- def heavy_foo(x):
- for i in range(1 << 20):
- x = 1
- rdd = self.sc.parallelize(range(100))
- rdd.foreach(heavy_foo)
- profiles = self.sc._profile_stats
- self.assertEqual(1, len(profiles))
- id, acc, _ = profiles[0]
- stats = acc.value
+ profilers = self.sc.profiler_collector.profilers
+ self.assertEqual(1, len(profilers))
+ id, profiler, _ = profilers[0]
+ stats = profiler.stats()
self.assertTrue(stats is not None)
width, stat_list = stats.get_print_list([])
func_names = [func_name for fname, n, func_name in stat_list]
@@ -763,6 +760,31 @@ class ProfilerTests(PySparkTestCase):
self.sc.dump_profiles(d)
self.assertTrue("rdd_%d.pstats" % id in os.listdir(d))
+ def test_custom_profiler(self):
+ class TestCustomProfiler(BasicProfiler):
+ def show(self, id):
+ self.result = "Custom formatting"
+
+ self.sc.profiler_collector.profiler_cls = TestCustomProfiler
+
+ self.do_computation()
+
+ profilers = self.sc.profiler_collector.profilers
+ self.assertEqual(1, len(profilers))
+ _, profiler, _ = profilers[0]
+ self.assertTrue(isinstance(profiler, TestCustomProfiler))
+
+ self.sc.show_profiles()
+ self.assertEqual("Custom formatting", profiler.result)
+
+ def do_computation(self):
+ def heavy_foo(x):
+ for i in range(1 << 20):
+ x = 1
+
+ rdd = self.sc.parallelize(range(100))
+ rdd.foreach(heavy_foo)
+
class ExamplePointUDT(UserDefinedType):
"""