diff options
author | Ewen Cheslack-Postava <me@ewencp.org> | 2013-10-22 00:22:37 -0700 |
---|---|---|
committer | Ewen Cheslack-Postava <me@ewencp.org> | 2013-10-22 00:22:37 -0700 |
commit | 56d230e614d7d03a0c53e262071ab388abddd97f (patch) | |
tree | 0c1f4f2282f3495deeaba8f73864b77c8049c1aa | |
parent | 39d2e9b293768b6eeb291313ee94f3e02a0ad522 (diff) | |
download | spark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.gz spark-56d230e614d7d03a0c53e262071ab388abddd97f.tar.bz2 spark-56d230e614d7d03a0c53e262071ab388abddd97f.zip |
Add classmethod to SparkContext to set system properties.
Add a new classmethod to SparkContext to set system properties like is
possible in Scala/Java. Unlike the Java/Scala implementations, there's
no access to System until the JVM bridge is created. Since
SparkContext handles that, move the initialization of the JVM
connection to a separate classmethod that can safely be called
repeatedly as long as the same instance (or no instance) is provided.
-rw-r--r-- | python/pyspark/context.py | 41 |
1 files changed, 29 insertions, 12 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 597110321a..22f5d92a3b 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -49,6 +49,7 @@ class SparkContext(object): _lock = Lock() _python_includes = None # zip and egg files that need to be added to PYTHONPATH + def __init__(self, master, jobName, sparkHome=None, pyFiles=None, environment=None, batchSize=1024): """ @@ -67,18 +68,8 @@ class SparkContext(object): Java object. Set 1 to disable batching or -1 to use an unlimited batch size. """ - with SparkContext._lock: - if SparkContext._active_spark_context: - raise ValueError("Cannot run multiple SparkContexts at once") - else: - SparkContext._active_spark_context = self - if not SparkContext._gateway: - SparkContext._gateway = launch_gateway() - SparkContext._jvm = SparkContext._gateway.jvm - SparkContext._writeIteratorToPickleFile = \ - SparkContext._jvm.PythonRDD.writeIteratorToPickleFile - SparkContext._takePartition = \ - SparkContext._jvm.PythonRDD.takePartition + SparkContext._ensure_initialized() + self.master = master self.jobName = jobName self.sparkHome = sparkHome or None # None becomes null in Py4J @@ -119,6 +110,32 @@ class SparkContext(object): self._temp_dir = \ self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath() + @classmethod + def _ensure_initialized(cls, instance=None): + with SparkContext._lock: + if not SparkContext._gateway: + SparkContext._gateway = launch_gateway() + SparkContext._jvm = SparkContext._gateway.jvm + SparkContext._writeIteratorToPickleFile = \ + SparkContext._jvm.PythonRDD.writeIteratorToPickleFile + SparkContext._takePartition = \ + SparkContext._jvm.PythonRDD.takePartition + + if instance: + if SparkContext._active_spark_context and SparkContext._active_spark_context != instance: + raise ValueError("Cannot run multiple SparkContexts at once") + else: + SparkContext._active_spark_context = instance + + @classmethod + def setSystemProperty(cls, key, value): + """ + Set a system property, such as spark.executor.memory. This must be + invoked before instantiating SparkContext. + """ + SparkContext._ensure_initialized() + SparkContext._jvm.java.lang.System.setProperty(key, value) + @property def defaultParallelism(self): """ |