aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala1
-rw-r--r--python/pyspark/context.py6
-rw-r--r--python/pyspark/rdd.py3
-rw-r--r--resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala1
4 files changed, 6 insertions, 5 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 0b1cec2df8..a8f732b11f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -85,6 +85,7 @@ object PythonRunner {
// pass conf spark.pyspark.python to python process, the only way to pass info to
// python process is through environment variable.
sparkConf.get(PYSPARK_PYTHON).foreach(env.put("PYSPARK_PYTHON", _))
+ sys.env.get("PYTHONHASHSEED").foreach(env.put("PYTHONHASHSEED", _))
builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize
try {
val process = builder.start()
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index ac4b2b035f..2961cda553 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -173,10 +173,8 @@ class SparkContext(object):
if k.startswith("spark.executorEnv."):
varName = k[len("spark.executorEnv."):]
self.environment[varName] = v
- if sys.version >= '3.3' and 'PYTHONHASHSEED' not in os.environ:
- # disable randomness of hash of string in worker, if this is not
- # launched by spark-submit
- self.environment["PYTHONHASHSEED"] = "0"
+
+ self.environment["PYTHONHASHSEED"] = os.environ.get("PYTHONHASHSEED", "0")
# Create the Java SparkContext through Py4J
self._jsc = jsc or self._initialize_context(self._conf._jconf)
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index b384b2b507..a5e6e2b054 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -68,7 +68,8 @@ def portable_hash(x):
>>> portable_hash((None, 1)) & 0xffffffff
219750521
"""
- if sys.version >= '3.3' and 'PYTHONHASHSEED' not in os.environ:
+
+ if sys.version_info >= (3, 2, 3) and 'PYTHONHASHSEED' not in os.environ:
raise Exception("Randomness of hash of string should be disabled via PYTHONHASHSEED")
if x is None:
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index fa99cd3b64..e86bd54593 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -817,6 +817,7 @@ private[spark] class Client(
sys.env.get(envname).foreach(env(envname) = _)
}
}
+ sys.env.get("PYTHONHASHSEED").foreach(env.put("PYTHONHASHSEED", _))
}
sys.env.get(ENV_DIST_CLASSPATH).foreach { dcp =>