diff options
author | Davies Liu <davies@databricks.com> | 2015-08-14 22:30:35 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-08-14 22:31:34 -0700 |
commit | d97af68af3e910eb7247e9832615758385d642b9 (patch) | |
tree | c8e02a86fb5a93c50bebafd1285dc4b079445235 /unsafe | |
parent | 33015009f5514fe510cdf5b486d2b84136a4522e (diff) | |
download | spark-d97af68af3e910eb7247e9832615758385d642b9.tar.gz spark-d97af68af3e910eb7247e9832615758385d642b9.tar.bz2 spark-d97af68af3e910eb7247e9832615758385d642b9.zip |
[SPARK-9725] [SQL] fix serialization of UTF8String across different JVM
The BYTE_ARRAY_OFFSET could be different in JVM with different configurations (for example, different heap size, 24 if heap > 32G, otherwise 16), so offset of UTF8String is not portable, we should handler that during serialization.
Author: Davies Liu <davies@databricks.com>
Closes #8210 from davies/serialize_utf8string.
(cherry picked from commit 7c1e56825b716a7d703dff38254b4739755ac0c4)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
Diffstat (limited to 'unsafe')
-rw-r--r-- | unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 667c00900f..cbcab958c0 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -18,8 +18,7 @@ package org.apache.spark.unsafe.types; import javax.annotation.Nonnull; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.nio.ByteOrder; import java.util.Arrays; import java.util.Map; @@ -38,12 +37,13 @@ import static org.apache.spark.unsafe.Platform.*; * <p> * Note: This is not designed for general use cases, should not be used outside SQL. */ -public final class UTF8String implements Comparable<UTF8String>, Serializable { +public final class UTF8String implements Comparable<UTF8String>, Externalizable { + // These are only updated by readExternal() @Nonnull - private final Object base; - private final long offset; - private final int numBytes; + private Object base; + private long offset; + private int numBytes; public Object getBaseObject() { return base; } public long getBaseOffset() { return offset; } @@ -127,6 +127,11 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { this.numBytes = numBytes; } + // for serialization + public UTF8String() { + this(null, 0, 0); + } + /** * Writes the content of this string into a memory address, identified by an object and an offset. * The target memory address must already been allocated, and have enough space to hold all the @@ -978,4 +983,18 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { } return UTF8String.fromBytes(sx); } + + public void writeExternal(ObjectOutput out) throws IOException { + byte[] bytes = getBytes(); + out.writeInt(bytes.length); + out.write(bytes); + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + offset = BYTE_ARRAY_OFFSET; + numBytes = in.readInt(); + base = new byte[numBytes]; + in.readFully((byte[]) base); + } + } |