aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/context.py
diff options
context:
space:
mode:
authorKan Zhang <kzhang@apache.org>2014-06-03 18:18:25 -0700
committerMatei Zaharia <matei@databricks.com>2014-06-03 18:18:25 -0700
commit21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f (patch)
tree64b67ee5a6c6048b274747dfa0769afcd9edc9b0 /python/pyspark/context.py
parentf4dd665c85713d4c09731080fca58aee0fa2a85a (diff)
downloadspark-21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f.tar.gz
spark-21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f.tar.bz2
spark-21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f.zip
[SPARK-1161] Add saveAsPickleFile and SparkContext.pickleFile in Python
Author: Kan Zhang <kzhang@apache.org> Closes #755 from kanzhang/SPARK-1161 and squashes the following commits: 24ed8a2 [Kan Zhang] [SPARK-1161] Fixing doc tests 44e0615 [Kan Zhang] [SPARK-1161] Adding an optional batchSize with default value 10 d929429 [Kan Zhang] [SPARK-1161] Add saveAsObjectFile and SparkContext.objectFile in Python
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r--python/pyspark/context.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 9ae9305d4f..211918f5a0 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -271,6 +271,20 @@ class SparkContext(object):
jrdd = readRDDFromFile(self._jsc, tempFile.name, numSlices)
return RDD(jrdd, self, serializer)
+ def pickleFile(self, name, minPartitions=None):
+ """
+ Load an RDD previously saved using L{RDD.saveAsPickleFile} method.
+
+ >>> tmpFile = NamedTemporaryFile(delete=True)
+ >>> tmpFile.close()
+ >>> sc.parallelize(range(10)).saveAsPickleFile(tmpFile.name, 5)
+ >>> sorted(sc.pickleFile(tmpFile.name, 3).collect())
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ """
+ minPartitions = minPartitions or self.defaultMinPartitions
+ return RDD(self._jsc.objectFile(name, minPartitions), self,
+ BatchedSerializer(PickleSerializer()))
+
def textFile(self, name, minPartitions=None):
"""
Read a text file from HDFS, a local file system (available on all