[SPARK-4387][PySpark] Refactoring python profiling code to make it extensible

This PR is based on #3255 , fix conflicts and code style. Closes #3255. Author: Yandu Oppacher <yandu.oppacher@jadedpixel.com> Author: Davies Liu <davies@databricks.com> Closes #3901 from davies/refactor-python-profile-code and squashes the following commits: b4a9306 [Davies Liu] fix tests 4b79ce8 [Davies Liu] add docstring for profiler_cls 2700e47 [Davies Liu] use BasicProfiler as default 349e341 [Davies Liu] more refactor 6a5d4df [Davies Liu] refactor and fix tests 31bf6b6 [Davies Liu] fix code style 0864b5d [Yandu Oppacher] Remove unused method 76a6c37 [Yandu Oppacher] Added a profile collector to accumulate the profilers per stage 9eefc36 [Yandu Oppacher] Fix doc 9ace076 [Yandu Oppacher] Refactor of profiler, and moved tests around 8739aff [Yandu Oppacher] Code review fixes 9bda3ec [Yandu Oppacher] Refactor profiler code
author: Yandu Oppacher <yandu.oppacher@jadedpixel.com> 2015-01-28 13:48:06 -0800
committer: Josh Rosen <joshrosen@databricks.com> 2015-01-28 13:48:06 -0800
commit: 3bead67d5926a2a798ca0e2bc71e747380493787 (patch)
tree: ce36d8e926702f8da17b9f43c49e1682d1e9fccb /python/pyspark/tests.py
parent: a731314c319a6f265060e05267844069027804fd (diff)
download: spark-3bead67d5926a2a798ca0e2bc71e747380493787.tar.gz
spark-3bead67d5926a2a798ca0e2bc71e747380493787.tar.bz2
spark-3bead67d5926a2a798ca0e2bc71e747380493787.zip
1 files changed, 31 insertions, 9 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index e694ffcff5..081a77fbb0 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -53,6 +53,7 @@ from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, External
 from pyspark.sql import SQLContext, IntegerType, Row, ArrayType, StructType, StructField, \
     UserDefinedType, DoubleType
 from pyspark import shuffle
+from pyspark.profiler import BasicProfiler
 
 _have_scipy = False
 _have_numpy = False
@@ -743,16 +744,12 @@ class ProfilerTests(PySparkTestCase):
         self.sc = SparkContext('local[4]', class_name, conf=conf)
 
     def test_profiler(self):
+        self.do_computation()
 
-        def heavy_foo(x):
-            for i in range(1 << 20):
-                x = 1
-        rdd = self.sc.parallelize(range(100))
-        rdd.foreach(heavy_foo)
-        profiles = self.sc._profile_stats
-        self.assertEqual(1, len(profiles))
-        id, acc, _ = profiles[0]
-        stats = acc.value
+        profilers = self.sc.profiler_collector.profilers
+        self.assertEqual(1, len(profilers))
+        id, profiler, _ = profilers[0]
+        stats = profiler.stats()
         self.assertTrue(stats is not None)
         width, stat_list = stats.get_print_list([])
         func_names = [func_name for fname, n, func_name in stat_list]
@@ -763,6 +760,31 @@ class ProfilerTests(PySparkTestCase):
         self.sc.dump_profiles(d)
         self.assertTrue("rdd_%d.pstats" % id in os.listdir(d))
 
+    def test_custom_profiler(self):
+        class TestCustomProfiler(BasicProfiler):
+            def show(self, id):
+                self.result = "Custom formatting"
+
+        self.sc.profiler_collector.profiler_cls = TestCustomProfiler
+
+        self.do_computation()
+
+        profilers = self.sc.profiler_collector.profilers
+        self.assertEqual(1, len(profilers))
+        _, profiler, _ = profilers[0]
+        self.assertTrue(isinstance(profiler, TestCustomProfiler))
+
+        self.sc.show_profiles()
+        self.assertEqual("Custom formatting", profiler.result)
+
+    def do_computation(self):
+        def heavy_foo(x):
+            for i in range(1 << 20):
+                x = 1
+
+        rdd = self.sc.parallelize(range(100))
+        rdd.foreach(heavy_foo)
+
 
 class ExamplePointUDT(UserDefinedType):
     """
author	Yandu Oppacher <yandu.oppacher@jadedpixel.com>	2015-01-28 13:48:06 -0800
committer	Josh Rosen <joshrosen@databricks.com>	2015-01-28 13:48:06 -0800
commit	3bead67d5926a2a798ca0e2bc71e747380493787 (patch)
tree	ce36d8e926702f8da17b9f43c49e1682d1e9fccb /python/pyspark/tests.py
parent	a731314c319a6f265060e05267844069027804fd (diff)
download	spark-3bead67d5926a2a798ca0e2bc71e747380493787.tar.gz spark-3bead67d5926a2a798ca0e2bc71e747380493787.tar.bz2 spark-3bead67d5926a2a798ca0e2bc71e747380493787.zip