diff options
author | Koert Kuipers <koert@tresata.com> | 2014-07-30 00:18:59 -0700 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2014-07-30 00:26:14 -0700 |
commit | 7c5fc28af42daaa6725af083d78c2372f3d0a338 (patch) | |
tree | b11558abc44907a58902da9a6b32c4ac40e16a39 /core/src | |
parent | 7003c163dbb46bb7313aab130a33486a356435a8 (diff) | |
download | spark-7c5fc28af42daaa6725af083d78c2372f3d0a338.tar.gz spark-7c5fc28af42daaa6725af083d78c2372f3d0a338.tar.bz2 spark-7c5fc28af42daaa6725af083d78c2372f3d0a338.zip |
SPARK-2543: Allow user to set maximum Kryo buffer size
Author: Koert Kuipers <koert@tresata.com>
Closes #735 from koertkuipers/feat-kryo-max-buffersize and squashes the following commits:
15f6d81 [Koert Kuipers] change default for spark.kryoserializer.buffer.max.mb to 64mb and add some documentation
1bcc22c [Koert Kuipers] Merge branch 'master' into feat-kryo-max-buffersize
0c9f8eb [Koert Kuipers] make default for kryo max buffer size 16MB
143ec4d [Koert Kuipers] test resizable buffer in kryo Output
0732445 [Koert Kuipers] support setting maxCapacity to something different than capacity in kryo Output
Diffstat (limited to 'core/src')
-rw-r--r-- | core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala | 3 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala | 30 |
2 files changed, 32 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala index fa79b25759..e60b802a86 100644 --- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -48,11 +48,12 @@ class KryoSerializer(conf: SparkConf) with Serializable { private val bufferSize = conf.getInt("spark.kryoserializer.buffer.mb", 2) * 1024 * 1024 + private val maxBufferSize = conf.getInt("spark.kryoserializer.buffer.max.mb", 64) * 1024 * 1024 private val referenceTracking = conf.getBoolean("spark.kryo.referenceTracking", true) private val registrationRequired = conf.getBoolean("spark.kryo.registrationRequired", false) private val registrator = conf.getOption("spark.kryo.registrator") - def newKryoOutput() = new KryoOutput(bufferSize) + def newKryoOutput() = new KryoOutput(bufferSize, math.max(bufferSize, maxBufferSize)) def newKryo(): Kryo = { val instantiator = new EmptyScalaKryoInstantiator diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala index 79280d1a06..789b773bae 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala @@ -209,6 +209,36 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext { } } +class KryoSerializerResizableOutputSuite extends FunSuite { + import org.apache.spark.SparkConf + import org.apache.spark.SparkContext + import org.apache.spark.LocalSparkContext + import org.apache.spark.SparkException + + // trial and error showed this will not serialize with 1mb buffer + val x = (1 to 400000).toArray + + test("kryo without resizable output buffer should fail on large array") { + val conf = new SparkConf(false) + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + conf.set("spark.kryoserializer.buffer.mb", "1") + conf.set("spark.kryoserializer.buffer.max.mb", "1") + val sc = new SparkContext("local", "test", conf) + intercept[SparkException](sc.parallelize(x).collect) + LocalSparkContext.stop(sc) + } + + test("kryo with resizable output buffer should succeed on large array") { + val conf = new SparkConf(false) + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + conf.set("spark.kryoserializer.buffer.mb", "1") + conf.set("spark.kryoserializer.buffer.max.mb", "2") + val sc = new SparkContext("local", "test", conf) + assert(sc.parallelize(x).collect === x) + LocalSparkContext.stop(sc) + } +} + object KryoTest { case class CaseClass(i: Int, s: String) {} |