From 98be8169f07eb0f1b8f01776c71d0e1ed3d5e4d5 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Tue, 17 Nov 2015 19:50:02 -0800 Subject: [SPARK-11737] [SQL] Fix serialization of UTF8String with Kyro The default implementation of serialization UTF8String with Kyro may be not correct (BYTE_ARRAY_OFFSET could be different across JVM) Author: Davies Liu Closes #9704 from davies/kyro_string. --- unsafe/pom.xml | 4 ++++ .../org/apache/spark/unsafe/types/UTF8String.java | 24 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'unsafe') diff --git a/unsafe/pom.xml b/unsafe/pom.xml index caf1f77890..a1c1111364 100644 --- a/unsafe/pom.xml +++ b/unsafe/pom.xml @@ -36,6 +36,10 @@ + + com.twitter + chill_${scala.binary.version} + diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index b7aecb5102..4bd3fd7772 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -24,6 +24,11 @@ import java.nio.ByteOrder; import java.util.Arrays; import java.util.Map; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.KryoSerializable; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; + import org.apache.spark.unsafe.Platform; import org.apache.spark.unsafe.array.ByteArrayMethods; @@ -38,9 +43,9 @@ import static org.apache.spark.unsafe.Platform.*; *

* Note: This is not designed for general use cases, should not be used outside SQL. */ -public final class UTF8String implements Comparable, Externalizable { +public final class UTF8String implements Comparable, Externalizable, KryoSerializable { - // These are only updated by readExternal() + // These are only updated by readExternal() or read() @Nonnull private Object base; private long offset; @@ -1003,4 +1008,19 @@ public final class UTF8String implements Comparable, Externalizable in.readFully((byte[]) base); } + @Override + public void write(Kryo kryo, Output out) { + byte[] bytes = getBytes(); + out.writeInt(bytes.length); + out.write(bytes); + } + + @Override + public void read(Kryo kryo, Input in) { + this.offset = BYTE_ARRAY_OFFSET; + this.numBytes = in.readInt(); + this.base = new byte[numBytes]; + in.read((byte[]) base); + } + } -- cgit v1.2.3