diff options
Diffstat (limited to 'python/pyspark/worker.py')
-rw-r--r-- | python/pyspark/worker.py | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 2bdccb5e93..e1552a0b0b 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -31,7 +31,7 @@ from pyspark.broadcast import Broadcast, _broadcastRegistry from pyspark.files import SparkFiles from pyspark.serializers import write_with_length, write_int, read_long, \ write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \ - CompressedSerializer + SizeLimitedStream, LargeObjectSerializer from pyspark import shuffle pickleSer = PickleSerializer() @@ -78,11 +78,13 @@ def main(infile, outfile): # fetch names and values of broadcast variables num_broadcast_variables = read_int(infile) - ser = CompressedSerializer(pickleSer) + bser = LargeObjectSerializer() for _ in range(num_broadcast_variables): bid = read_long(infile) if bid >= 0: - value = ser._read_with_length(infile) + size = read_long(infile) + s = SizeLimitedStream(infile, size) + value = list((bser.load_stream(s)))[0] # read out all the bytes _broadcastRegistry[bid] = Broadcast(bid, value) else: bid = - bid - 1 |