aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/worker.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/worker.py')
-rw-r--r--python/pyspark/worker.py8
1 files changed, 5 insertions, 3 deletions
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 2bdccb5e93..e1552a0b0b 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -31,7 +31,7 @@ from pyspark.broadcast import Broadcast, _broadcastRegistry
from pyspark.files import SparkFiles
from pyspark.serializers import write_with_length, write_int, read_long, \
write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
- CompressedSerializer
+ SizeLimitedStream, LargeObjectSerializer
from pyspark import shuffle
pickleSer = PickleSerializer()
@@ -78,11 +78,13 @@ def main(infile, outfile):
# fetch names and values of broadcast variables
num_broadcast_variables = read_int(infile)
- ser = CompressedSerializer(pickleSer)
+ bser = LargeObjectSerializer()
for _ in range(num_broadcast_variables):
bid = read_long(infile)
if bid >= 0:
- value = ser._read_with_length(infile)
+ size = read_long(infile)
+ s = SizeLimitedStream(infile, size)
+ value = list((bser.load_stream(s)))[0] # read out all the bytes
_broadcastRegistry[bid] = Broadcast(bid, value)
else:
bid = - bid - 1