aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/context.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r--python/pyspark/context.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 5c32f8ea1d..7a7f59cb50 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -767,7 +767,7 @@ class SparkContext(object):
SparkContext._next_accum_id += 1
return Accumulator(SparkContext._next_accum_id - 1, value, accum_param)
- def addFile(self, path):
+ def addFile(self, path, recursive=False):
"""
Add a file to be downloaded with this Spark job on every node.
The C{path} passed can be either a local file, a file in HDFS
@@ -778,6 +778,9 @@ class SparkContext(object):
L{SparkFiles.get(fileName)<pyspark.files.SparkFiles.get>} with the
filename to find its download location.
+ A directory can be given if the recursive option is set to True.
+ Currently directories are only supported for Hadoop-supported filesystems.
+
>>> from pyspark import SparkFiles
>>> path = os.path.join(tempdir, "test.txt")
>>> with open(path, "w") as testFile:
@@ -790,7 +793,7 @@ class SparkContext(object):
>>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
[100, 200, 300, 400]
"""
- self._jsc.sc().addFile(path)
+ self._jsc.sc().addFile(path, recursive)
def addPyFile(self, path):
"""