aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2013-07-28 19:24:23 -0700
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-07-29 22:08:57 -0700
commitb95732632b5b06d494ebd9e539af136ab3b8490e (patch)
tree5da7e27ebb421e22d9f415fd86ac13adbe0de191 /python
parent468a36c00526872396196458fd7875fd06ac7108 (diff)
downloadspark-b95732632b5b06d494ebd9e539af136ab3b8490e.tar.gz
spark-b95732632b5b06d494ebd9e539af136ab3b8490e.tar.bz2
spark-b95732632b5b06d494ebd9e539af136ab3b8490e.zip
Do not inherit master's PYTHONPATH on workers.
This fixes SPARK-832, an issue where PySpark would not work when the master and workers used different SPARK_HOME paths. This change may potentially break code that relied on the master's PYTHONPATH being used on workers. To have custom PYTHONPATH additions used on the workers, users should set a custom PYTHONPATH in spark-env.sh rather than setting it in the shell.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/rdd.py5
1 files changed, 2 insertions, 3 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 8734cacb0b..51c2cb9806 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -756,9 +756,8 @@ class PipelinedRDD(RDD):
self.ctx._gateway._gateway_client)
self.ctx._pickled_broadcast_vars.clear()
class_manifest = self._prev_jrdd.classManifest()
- env = copy.copy(self.ctx.environment)
- env['PYTHONPATH'] = os.environ.get("PYTHONPATH", "")
- env = MapConverter().convert(env, self.ctx._gateway._gateway_client)
+ env = MapConverter().convert(self.ctx.environment,
+ self.ctx._gateway._gateway_client)
python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
pipe_command, env, self.preservesPartitioning, self.ctx.pythonExec,
broadcast_vars, self.ctx._javaAccumulator, class_manifest)