aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/serializers.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@apache.org>2013-11-03 11:03:02 -0800
committerJosh Rosen <joshrosen@apache.org>2013-11-03 11:03:02 -0800
commit7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e (patch)
treef189e5af2716bfb2473ce5ce063ddddebe30f646 /python/pyspark/serializers.py
parenta48d88d206fae348720ab077a624b3c57293374f (diff)
downloadspark-7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e.tar.gz
spark-7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e.tar.bz2
spark-7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e.zip
Remove Pickle-wrapping of Java objects in PySpark.
If we support custom serializers, the Python worker will know what type of input to expect, so we won't need to wrap Tuple2 and Strings into pickled tuples and strings.
Diffstat (limited to 'python/pyspark/serializers.py')
-rw-r--r--python/pyspark/serializers.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index fbc280fd37..fd02e1ee8f 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -93,6 +93,14 @@ def write_with_length(obj, stream):
stream.write(obj)
+def read_mutf8(stream):
+ """
+ Read a string written with Java's DataOutputStream.writeUTF() method.
+ """
+ length = struct.unpack('>H', stream.read(2))[0]
+ return stream.read(length).decode('utf8')
+
+
def read_with_length(stream):
length = read_int(stream)
obj = stream.read(length)
@@ -112,3 +120,13 @@ def read_from_pickle_file(stream):
yield obj
except EOFError:
return
+
+
+def read_pairs_from_pickle_file(stream):
+ try:
+ while True:
+ a = load_pickle(read_with_length(stream))
+ b = load_pickle(read_with_length(stream))
+ yield (a, b)
+ except EOFError:
+ return \ No newline at end of file