aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorEwen Cheslack-Postava <me@ewencp.org>2013-10-22 00:22:37 -0700
committerEwen Cheslack-Postava <me@ewencp.org>2013-10-22 00:22:37 -0700
commit56d230e614d7d03a0c53e262071ab388abddd97f (patch)
tree0c1f4f2282f3495deeaba8f73864b77c8049c1aa /python
parent39d2e9b293768b6eeb291313ee94f3e02a0ad522 (diff)
downloadspark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.gz
spark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.bz2
spark-56d230e614d7d03a0c53e262071ab388abddd97f.zip
Add classmethod to SparkContext to set system properties.
Add a new classmethod to SparkContext to set system properties like is possible in Scala/Java. Unlike the Java/Scala implementations, there's no access to System until the JVM bridge is created. Since SparkContext handles that, move the initialization of the JVM connection to a separate classmethod that can safely be called repeatedly as long as the same instance (or no instance) is provided.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/context.py41
1 files changed, 29 insertions, 12 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 597110321a..22f5d92a3b 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -49,6 +49,7 @@ class SparkContext(object):
_lock = Lock()
_python_includes = None # zip and egg files that need to be added to PYTHONPATH
+
def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
environment=None, batchSize=1024):
"""
@@ -67,18 +68,8 @@ class SparkContext(object):
Java object. Set 1 to disable batching or -1 to use an
unlimited batch size.
"""
- with SparkContext._lock:
- if SparkContext._active_spark_context:
- raise ValueError("Cannot run multiple SparkContexts at once")
- else:
- SparkContext._active_spark_context = self
- if not SparkContext._gateway:
- SparkContext._gateway = launch_gateway()
- SparkContext._jvm = SparkContext._gateway.jvm
- SparkContext._writeIteratorToPickleFile = \
- SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
- SparkContext._takePartition = \
- SparkContext._jvm.PythonRDD.takePartition
+ SparkContext._ensure_initialized()
+
self.master = master
self.jobName = jobName
self.sparkHome = sparkHome or None # None becomes null in Py4J
@@ -119,6 +110,32 @@ class SparkContext(object):
self._temp_dir = \
self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()
+ @classmethod
+ def _ensure_initialized(cls, instance=None):
+ with SparkContext._lock:
+ if not SparkContext._gateway:
+ SparkContext._gateway = launch_gateway()
+ SparkContext._jvm = SparkContext._gateway.jvm
+ SparkContext._writeIteratorToPickleFile = \
+ SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
+ SparkContext._takePartition = \
+ SparkContext._jvm.PythonRDD.takePartition
+
+ if instance:
+ if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
+ raise ValueError("Cannot run multiple SparkContexts at once")
+ else:
+ SparkContext._active_spark_context = instance
+
+ @classmethod
+ def setSystemProperty(cls, key, value):
+ """
+ Set a system property, such as spark.executor.memory. This must be
+ invoked before instantiating SparkContext.
+ """
+ SparkContext._ensure_initialized()
+ SparkContext._jvm.java.lang.System.setProperty(key, value)
+
@property
def defaultParallelism(self):
"""