Add classmethod to SparkContext to set system properties.

Add a new classmethod to SparkContext to set system properties like is possible in Scala/Java. Unlike the Java/Scala implementations, there's no access to System until the JVM bridge is created. Since SparkContext handles that, move the initialization of the JVM connection to a separate classmethod that can safely be called repeatedly as long as the same instance (or no instance) is provided.
author: Ewen Cheslack-Postava <me@ewencp.org> 2013-10-22 00:22:37 -0700
committer: Ewen Cheslack-Postava <me@ewencp.org> 2013-10-22 00:22:37 -0700
commit: 56d230e614d7d03a0c53e262071ab388abddd97f (patch)
tree: 0c1f4f2282f3495deeaba8f73864b77c8049c1aa /python
parent: 39d2e9b293768b6eeb291313ee94f3e02a0ad522 (diff)
download: spark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.gz
spark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.bz2
spark-56d230e614d7d03a0c53e262071ab388abddd97f.zip
1 files changed, 29 insertions, 12 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 597110321a..22f5d92a3b 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -49,6 +49,7 @@ class SparkContext(object):
     _lock = Lock()
     _python_includes = None # zip and egg files that need to be added to PYTHONPATH
 
+
     def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
         environment=None, batchSize=1024):
         """
@@ -67,18 +68,8 @@ class SparkContext(object):
                Java object.  Set 1 to disable batching or -1 to use an
                unlimited batch size.
         """
-        with SparkContext._lock:
-            if SparkContext._active_spark_context:
-                raise ValueError("Cannot run multiple SparkContexts at once")
-            else:
-                SparkContext._active_spark_context = self
-                if not SparkContext._gateway:
-                    SparkContext._gateway = launch_gateway()
-                    SparkContext._jvm = SparkContext._gateway.jvm
-                    SparkContext._writeIteratorToPickleFile = \
-                        SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
-                    SparkContext._takePartition = \
-                        SparkContext._jvm.PythonRDD.takePartition
+        SparkContext._ensure_initialized()
+
         self.master = master
         self.jobName = jobName
         self.sparkHome = sparkHome or None # None becomes null in Py4J
@@ -119,6 +110,32 @@ class SparkContext(object):
         self._temp_dir = \
             self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()
 
+    @classmethod
+    def _ensure_initialized(cls, instance=None):
+        with SparkContext._lock:
+            if not SparkContext._gateway:
+                SparkContext._gateway = launch_gateway()
+                SparkContext._jvm = SparkContext._gateway.jvm
+                SparkContext._writeIteratorToPickleFile = \
+                    SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
+                SparkContext._takePartition = \
+                    SparkContext._jvm.PythonRDD.takePartition
+
+            if instance:
+                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
+                    raise ValueError("Cannot run multiple SparkContexts at once")
+                else:
+                    SparkContext._active_spark_context = instance
+
+    @classmethod
+    def setSystemProperty(cls, key, value):
+        """
+        Set a system property, such as spark.executor.memory. This must be
+        invoked before instantiating SparkContext.
+        """
+        SparkContext._ensure_initialized()
+        SparkContext._jvm.java.lang.System.setProperty(key, value)
+
     @property
     def defaultParallelism(self):
         """
author	Ewen Cheslack-Postava <me@ewencp.org>	2013-10-22 00:22:37 -0700
committer	Ewen Cheslack-Postava <me@ewencp.org>	2013-10-22 00:22:37 -0700
commit	56d230e614d7d03a0c53e262071ab388abddd97f (patch)
tree	0c1f4f2282f3495deeaba8f73864b77c8049c1aa /python
parent	39d2e9b293768b6eeb291313ee94f3e02a0ad522 (diff)
download	spark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.gz spark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.bz2 spark-56d230e614d7d03a0c53e262071ab388abddd97f.zip