aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/serializers.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/serializers.py')
-rw-r--r--python/pyspark/serializers.py21
1 files changed, 16 insertions, 5 deletions
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index a10f85b55a..b35558db3e 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -111,6 +111,7 @@ class Serializer(object):
class FramedSerializer(Serializer):
+
"""
Serializer that writes objects as a stream of (length, data) pairs,
where C{length} is a 32-bit integer and data is C{length} bytes.
@@ -162,6 +163,7 @@ class FramedSerializer(Serializer):
class BatchedSerializer(Serializer):
+
"""
Serializes a stream of objects in batches by calling its wrapped
Serializer with streams of objects.
@@ -207,6 +209,7 @@ class BatchedSerializer(Serializer):
class CartesianDeserializer(FramedSerializer):
+
"""
Deserializes the JavaRDD cartesian() of two PythonRDDs.
"""
@@ -240,6 +243,7 @@ class CartesianDeserializer(FramedSerializer):
class PairDeserializer(CartesianDeserializer):
+
"""
Deserializes the JavaRDD zip() of two PythonRDDs.
"""
@@ -289,6 +293,7 @@ def _hack_namedtuple(cls):
""" Make class generated by namedtuple picklable """
name = cls.__name__
fields = cls._fields
+
def __reduce__(self):
return (_restore, (name, fields, tuple(self)))
cls.__reduce__ = __reduce__
@@ -301,10 +306,11 @@ def _hijack_namedtuple():
if hasattr(collections.namedtuple, "__hijack"):
return
- global _old_namedtuple # or it will put in closure
+ global _old_namedtuple # or it will put in closure
+
def _copy_func(f):
return types.FunctionType(f.func_code, f.func_globals, f.func_name,
- f.func_defaults, f.func_closure)
+ f.func_defaults, f.func_closure)
_old_namedtuple = _copy_func(collections.namedtuple)
@@ -323,15 +329,16 @@ def _hijack_namedtuple():
# so only hack those in __main__ module
for n, o in sys.modules["__main__"].__dict__.iteritems():
if (type(o) is type and o.__base__ is tuple
- and hasattr(o, "_fields")
- and "__reduce__" not in o.__dict__):
- _hack_namedtuple(o) # hack inplace
+ and hasattr(o, "_fields")
+ and "__reduce__" not in o.__dict__):
+ _hack_namedtuple(o) # hack inplace
_hijack_namedtuple()
class PickleSerializer(FramedSerializer):
+
"""
Serializes objects using Python's cPickle serializer:
@@ -354,6 +361,7 @@ class CloudPickleSerializer(PickleSerializer):
class MarshalSerializer(FramedSerializer):
+
"""
Serializes objects using Python's Marshal serializer:
@@ -367,9 +375,11 @@ class MarshalSerializer(FramedSerializer):
class AutoSerializer(FramedSerializer):
+
"""
Choose marshal or cPickle as serialization protocol autumatically
"""
+
def __init__(self):
FramedSerializer.__init__(self)
self._type = None
@@ -394,6 +404,7 @@ class AutoSerializer(FramedSerializer):
class UTF8Deserializer(Serializer):
+
"""
Deserializes streams written by String.getBytes.
"""