aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/serializers.py
diff options
context:
space:
mode:
authorDavies Liu <davies.liu@gmail.com>2014-10-10 14:14:05 -0700
committerJosh Rosen <joshrosen@apache.org>2014-10-10 14:14:05 -0700
commit72f36ee571ad27c7c7c70bb9aecc7e6ef51dfd44 (patch)
tree091ca732b2b48875c478e416807e28a23f0916d7 /python/pyspark/serializers.py
parent90f73fcc47c7bf881f808653d46a9936f37c3c31 (diff)
downloadspark-72f36ee571ad27c7c7c70bb9aecc7e6ef51dfd44.tar.gz
spark-72f36ee571ad27c7c7c70bb9aecc7e6ef51dfd44.tar.bz2
spark-72f36ee571ad27c7c7c70bb9aecc7e6ef51dfd44.zip
[SPARK-3886] [PySpark] use AutoBatchedSerializer by default
Use AutoBatchedSerializer by default, which will choose the proper batch size based on size of serialized objects, let the size of serialized batch fall in into [64k - 640k]. In JVM, the serializer will also track the objects in batch to figure out duplicated objects, larger batch may cause OOM in JVM. Author: Davies Liu <davies.liu@gmail.com> Closes #2740 from davies/batchsize and squashes the following commits: 52cdb88 [Davies Liu] update docs 185f2b9 [Davies Liu] use AutoBatchedSerializer by default
Diffstat (limited to 'python/pyspark/serializers.py')
-rw-r--r--python/pyspark/serializers.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 099fa54cf2..3d1a34b281 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -220,7 +220,7 @@ class AutoBatchedSerializer(BatchedSerializer):
Choose the size of batch automatically based on the size of object
"""
- def __init__(self, serializer, bestSize=1 << 20):
+ def __init__(self, serializer, bestSize=1 << 16):
BatchedSerializer.__init__(self, serializer, -1)
self.bestSize = bestSize
@@ -247,7 +247,7 @@ class AutoBatchedSerializer(BatchedSerializer):
other.serializer == self.serializer)
def __str__(self):
- return "BatchedSerializer<%s>" % str(self.serializer)
+ return "AutoBatchedSerializer<%s>" % str(self.serializer)
class CartesianDeserializer(FramedSerializer):