aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/spark/KryoSerializer.scala4
-rw-r--r--docs/configuration.md12
2 files changed, 15 insertions, 1 deletions
diff --git a/core/src/main/scala/spark/KryoSerializer.scala b/core/src/main/scala/spark/KryoSerializer.scala
index d723ab7b1e..c7dbcc6fbc 100644
--- a/core/src/main/scala/spark/KryoSerializer.scala
+++ b/core/src/main/scala/spark/KryoSerializer.scala
@@ -210,6 +210,10 @@ class KryoSerializer extends spark.serializer.Serializer with Logging {
val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
reg.registerClasses(kryo)
}
+
+ // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops
+ kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean)
+
kryo
}
diff --git a/docs/configuration.md b/docs/configuration.md
index 5a80510959..5c06897cae 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -198,8 +198,18 @@ Apart from these, the following properties are also available, and may be useful
</td>
</tr>
<tr>
+ <td>spark.kryo.referenceTracking</td>
+ <td>true</td>
+ <td>
+ Whether to track references to the same object when serializing data with Kryo, which is
+ necessary if your object graphs have loops and useful for efficiency if they contain multiple
+ copies of the same object. Can be disabled to improve performance if you know this is not the
+ case.
+ </td>
+</tr>
+<tr>
<td>spark.kryoserializer.buffer.mb</td>
- <td>32</td>
+ <td>2</td>
<td>
Maximum object size to allow within Kryo (the library needs to create a buffer at least as
large as the largest single object you'll serialize). Increase this if you get a "buffer limit