diff options
Diffstat (limited to 'python/pyspark/serializers.py')
-rw-r--r-- | python/pyspark/serializers.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index df90cafb24..74870c0edc 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -67,6 +67,7 @@ import struct import sys import types import collections +import zlib from pyspark import cloudpickle @@ -403,6 +404,22 @@ class AutoSerializer(FramedSerializer): raise ValueError("invalid sevialization type: %s" % _type) +class CompressedSerializer(FramedSerializer): + """ + compress the serialized data + """ + + def __init__(self, serializer): + FramedSerializer.__init__(self) + self.serializer = serializer + + def dumps(self, obj): + return zlib.compress(self.serializer.dumps(obj), 1) + + def loads(self, obj): + return self.serializer.loads(zlib.decompress(obj)) + + class UTF8Deserializer(Serializer): """ |