aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/worker.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-21 16:42:24 -0800
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-21 17:34:17 -0800
commitef711902c1f42db14c8ddd524195f0a9efb56e65 (patch)
treee770a7439d3983c13346cbd81aa1eeeef23e2571 /python/pyspark/worker.py
parent506077c9938cd411842fe42404aa6b74b45b23a1 (diff)
downloadspark-ef711902c1f42db14c8ddd524195f0a9efb56e65.tar.gz
spark-ef711902c1f42db14c8ddd524195f0a9efb56e65.tar.bz2
spark-ef711902c1f42db14c8ddd524195f0a9efb56e65.zip
Don't download files to master's working directory.
This should avoid exceptions caused by existing files with different contents. I also removed some unused code.
Diffstat (limited to 'python/pyspark/worker.py')
-rw-r--r--python/pyspark/worker.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index b2b9288089..e7bdb7682b 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -8,6 +8,7 @@ from base64 import standard_b64decode
from pyspark.accumulators import _accumulatorRegistry
from pyspark.broadcast import Broadcast, _broadcastRegistry
from pyspark.cloudpickle import CloudPickler
+from pyspark.files import SparkFiles
from pyspark.serializers import write_with_length, read_with_length, write_int, \
read_long, read_int, dump_pickle, load_pickle, read_from_pickle_file
@@ -23,6 +24,8 @@ def load_obj():
def main():
split_index = read_int(sys.stdin)
+ spark_files_dir = load_pickle(read_with_length(sys.stdin))
+ SparkFiles._root_directory = spark_files_dir
num_broadcast_variables = read_int(sys.stdin)
for _ in range(num_broadcast_variables):
bid = read_long(sys.stdin)