diff options
author | Kan Zhang <kzhang@apache.org> | 2014-06-03 18:18:25 -0700 |
---|---|---|
committer | Matei Zaharia <matei@databricks.com> | 2014-06-03 18:18:25 -0700 |
commit | 21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f (patch) | |
tree | 64b67ee5a6c6048b274747dfa0769afcd9edc9b0 /python/pyspark/context.py | |
parent | f4dd665c85713d4c09731080fca58aee0fa2a85a (diff) | |
download | spark-21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f.tar.gz spark-21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f.tar.bz2 spark-21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f.zip |
[SPARK-1161] Add saveAsPickleFile and SparkContext.pickleFile in Python
Author: Kan Zhang <kzhang@apache.org>
Closes #755 from kanzhang/SPARK-1161 and squashes the following commits:
24ed8a2 [Kan Zhang] [SPARK-1161] Fixing doc tests
44e0615 [Kan Zhang] [SPARK-1161] Adding an optional batchSize with default value 10
d929429 [Kan Zhang] [SPARK-1161] Add saveAsObjectFile and SparkContext.objectFile in Python
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r-- | python/pyspark/context.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 9ae9305d4f..211918f5a0 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -271,6 +271,20 @@ class SparkContext(object): jrdd = readRDDFromFile(self._jsc, tempFile.name, numSlices) return RDD(jrdd, self, serializer) + def pickleFile(self, name, minPartitions=None): + """ + Load an RDD previously saved using L{RDD.saveAsPickleFile} method. + + >>> tmpFile = NamedTemporaryFile(delete=True) + >>> tmpFile.close() + >>> sc.parallelize(range(10)).saveAsPickleFile(tmpFile.name, 5) + >>> sorted(sc.pickleFile(tmpFile.name, 3).collect()) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + """ + minPartitions = minPartitions or self.defaultMinPartitions + return RDD(self._jsc.objectFile(name, minPartitions), self, + BatchedSerializer(PickleSerializer())) + def textFile(self, name, minPartitions=None): """ Read a text file from HDFS, a local file system (available on all |