diff options
author | Josh Rosen <joshrosen@apache.org> | 2013-11-03 11:03:02 -0800 |
---|---|---|
committer | Josh Rosen <joshrosen@apache.org> | 2013-11-03 11:03:02 -0800 |
commit | 7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e (patch) | |
tree | f189e5af2716bfb2473ce5ce063ddddebe30f646 /python/pyspark/serializers.py | |
parent | a48d88d206fae348720ab077a624b3c57293374f (diff) | |
download | spark-7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e.tar.gz spark-7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e.tar.bz2 spark-7d68a81a8ed5f49fefb3bd0fa0b9d3835cc7d86e.zip |
Remove Pickle-wrapping of Java objects in PySpark.
If we support custom serializers, the Python
worker will know what type of input to expect,
so we won't need to wrap Tuple2 and Strings into
pickled tuples and strings.
Diffstat (limited to 'python/pyspark/serializers.py')
-rw-r--r-- | python/pyspark/serializers.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index fbc280fd37..fd02e1ee8f 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -93,6 +93,14 @@ def write_with_length(obj, stream): stream.write(obj) +def read_mutf8(stream): + """ + Read a string written with Java's DataOutputStream.writeUTF() method. + """ + length = struct.unpack('>H', stream.read(2))[0] + return stream.read(length).decode('utf8') + + def read_with_length(stream): length = read_int(stream) obj = stream.read(length) @@ -112,3 +120,13 @@ def read_from_pickle_file(stream): yield obj except EOFError: return + + +def read_pairs_from_pickle_file(stream): + try: + while True: + a = load_pickle(read_with_length(stream)) + b = load_pickle(read_with_length(stream)) + yield (a, b) + except EOFError: + return
\ No newline at end of file |