aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/context.py
diff options
context:
space:
mode:
authorMatei Zaharia <matei@databricks.com>2013-12-29 14:31:45 -0500
committerMatei Zaharia <matei@databricks.com>2013-12-29 14:32:05 -0500
commit615fb649d66b13371927a051d249433d746c5f19 (patch)
tree5a3b3487b46517765d31cdc0f2c2f340c714666d /python/pyspark/context.py
parentcd00225db9b90fc845fd1458831bdd9d014d1bb6 (diff)
downloadspark-615fb649d66b13371927a051d249433d746c5f19.tar.gz
spark-615fb649d66b13371927a051d249433d746c5f19.tar.bz2
spark-615fb649d66b13371927a051d249433d746c5f19.zip
Fix some other Python tests due to initializing JVM in a different way
The test in context.py created two different instances of the SparkContext class by copying "globals", so that some tests can have a global "sc" object and others can try initializing their own contexts. This led to two JVM gateways being created since SparkConf also looked at pyspark.context.SparkContext to get the JVM.
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r--python/pyspark/context.py23
1 files changed, 15 insertions, 8 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 97c1526afd..9d75c2b6f1 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -81,7 +81,8 @@ class SparkContext(object):
"""
SparkContext._ensure_initialized(self)
- self.conf = conf or SparkConf()
+ self.environment = environment or {}
+ self.conf = conf or SparkConf(_jvm=self._jvm)
self._batchSize = batchSize # -1 represents an unlimited batch size
self._unbatched_serializer = serializer
if batchSize == 1:
@@ -90,23 +91,30 @@ class SparkContext(object):
self.serializer = BatchedSerializer(self._unbatched_serializer,
batchSize)
- # Set parameters passed directly on our conf; these operations will be no-ops
- # if the parameters were None
+ # Set parameters passed directly to us on the conf; these operations will be
+ # no-ops if the parameters were None
self.conf.setMaster(master)
self.conf.setAppName(appName)
self.conf.setSparkHome(sparkHome)
- environment = environment or {}
- for key, value in environment.iteritems():
- self.conf.setExecutorEnv(key, value)
+ if environment:
+ for key, value in environment.iteritems():
+ self.conf.setExecutorEnv(key, value)
+ # Check that we have at least the required parameters
if not self.conf.contains("spark.master"):
raise Exception("A master URL must be set in your configuration")
if not self.conf.contains("spark.appName"):
raise Exception("An application name must be set in your configuration")
+ # Read back our properties from the conf in case we loaded some of them from
+ # the classpath or an external config file
self.master = self.conf.get("spark.master")
self.appName = self.conf.get("spark.appName")
self.sparkHome = self.conf.getOrElse("spark.home", None)
+ for (k, v) in self.conf.getAll():
+ if k.startswith("spark.executorEnv."):
+ varName = k[len("spark.executorEnv."):]
+ self.environment[varName] = v
# Create the Java SparkContext through Py4J
self._jsc = self._jvm.JavaSparkContext(self.conf._jconf)
@@ -147,8 +155,7 @@ class SparkContext(object):
if not SparkContext._gateway:
SparkContext._gateway = launch_gateway()
SparkContext._jvm = SparkContext._gateway.jvm
- SparkContext._writeToFile = \
- SparkContext._jvm.PythonRDD.writeToFile
+ SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile
if instance:
if SparkContext._active_spark_context and SparkContext._active_spark_context != instance: