aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/rdd.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2013-02-01 11:48:11 -0800
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2013-02-01 11:50:27 -0800
commite211f405bcb3cf02c3ae589cf81d9c9dfc70bc03 (patch)
treecaf8ce5cd59c5d9cfecb5f6d81de4e635c0fa7a1 /python/pyspark/rdd.py
parentb6a6092177a008cdcd19810d9d3a5715dae927b0 (diff)
downloadspark-e211f405bcb3cf02c3ae589cf81d9c9dfc70bc03.tar.gz
spark-e211f405bcb3cf02c3ae589cf81d9c9dfc70bc03.tar.bz2
spark-e211f405bcb3cf02c3ae589cf81d9c9dfc70bc03.zip
Use spark.local.dir for PySpark temp files (SPARK-580).
Diffstat (limited to 'python/pyspark/rdd.py')
-rw-r--r--python/pyspark/rdd.py7
1 files changed, 1 insertions, 6 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d7cad2f372..41ea6e6e14 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1,4 +1,3 @@
-import atexit
from base64 import standard_b64encode as b64enc
import copy
from collections import defaultdict
@@ -264,12 +263,8 @@ class RDD(object):
# Transferring lots of data through Py4J can be slow because
# socket.readline() is inefficient. Instead, we'll dump the data to a
# file and read it back.
- tempFile = NamedTemporaryFile(delete=False)
+ tempFile = NamedTemporaryFile(delete=False, dir=self.ctx._temp_dir)
tempFile.close()
- def clean_up_file():
- try: os.unlink(tempFile.name)
- except: pass
- atexit.register(clean_up_file)
self.ctx._writeIteratorToPickleFile(iterator, tempFile.name)
# Read the data into Python and deserialize it:
with open(tempFile.name, 'rb') as tempFile: