diff options
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r-- | python/pyspark/context.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 5c32f8ea1d..7a7f59cb50 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -767,7 +767,7 @@ class SparkContext(object): SparkContext._next_accum_id += 1 return Accumulator(SparkContext._next_accum_id - 1, value, accum_param) - def addFile(self, path): + def addFile(self, path, recursive=False): """ Add a file to be downloaded with this Spark job on every node. The C{path} passed can be either a local file, a file in HDFS @@ -778,6 +778,9 @@ class SparkContext(object): L{SparkFiles.get(fileName)<pyspark.files.SparkFiles.get>} with the filename to find its download location. + A directory can be given if the recursive option is set to True. + Currently directories are only supported for Hadoop-supported filesystems. + >>> from pyspark import SparkFiles >>> path = os.path.join(tempdir, "test.txt") >>> with open(path, "w") as testFile: @@ -790,7 +793,7 @@ class SparkContext(object): >>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect() [100, 200, 300, 400] """ - self._jsc.sc().addFile(path) + self._jsc.sc().addFile(path, recursive) def addPyFile(self, path): """ |