diff options
author | Davies Liu <davies@databricks.com> | 2015-08-14 22:30:35 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-08-14 22:30:35 -0700 |
commit | 7c1e56825b716a7d703dff38254b4739755ac0c4 (patch) | |
tree | 0de34f9c8b81c8787f47ca8973b1bc94e62962ef /unsafe/src | |
parent | 71a3af8a94f900a26ac7094f22ec1216cab62e15 (diff) | |
download | spark-7c1e56825b716a7d703dff38254b4739755ac0c4.tar.gz spark-7c1e56825b716a7d703dff38254b4739755ac0c4.tar.bz2 spark-7c1e56825b716a7d703dff38254b4739755ac0c4.zip |
[SPARK-9725] [SQL] fix serialization of UTF8String across different JVM
The BYTE_ARRAY_OFFSET could be different in JVM with different configurations (for example, different heap size, 24 if heap > 32G, otherwise 16), so offset of UTF8String is not portable, we should handler that during serialization.
Author: Davies Liu <davies@databricks.com>
Closes #8210 from davies/serialize_utf8string.
Diffstat (limited to 'unsafe/src')
-rw-r--r-- | unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 667c00900f..cbcab958c0 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -18,8 +18,7 @@ package org.apache.spark.unsafe.types; import javax.annotation.Nonnull; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.nio.ByteOrder; import java.util.Arrays; import java.util.Map; @@ -38,12 +37,13 @@ import static org.apache.spark.unsafe.Platform.*; * <p> * Note: This is not designed for general use cases, should not be used outside SQL. */ -public final class UTF8String implements Comparable<UTF8String>, Serializable { +public final class UTF8String implements Comparable<UTF8String>, Externalizable { + // These are only updated by readExternal() @Nonnull - private final Object base; - private final long offset; - private final int numBytes; + private Object base; + private long offset; + private int numBytes; public Object getBaseObject() { return base; } public long getBaseOffset() { return offset; } @@ -127,6 +127,11 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { this.numBytes = numBytes; } + // for serialization + public UTF8String() { + this(null, 0, 0); + } + /** * Writes the content of this string into a memory address, identified by an object and an offset. * The target memory address must already been allocated, and have enough space to hold all the @@ -978,4 +983,18 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { } return UTF8String.fromBytes(sx); } + + public void writeExternal(ObjectOutput out) throws IOException { + byte[] bytes = getBytes(); + out.writeInt(bytes.length); + out.write(bytes); + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + offset = BYTE_ARRAY_OFFSET; + numBytes = in.readInt(); + base = new byte[numBytes]; + in.readFully((byte[]) base); + } + } |