aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-08-14 22:30:35 -0700
committerDavies Liu <davies.liu@gmail.com>2015-08-14 22:30:35 -0700
commit7c1e56825b716a7d703dff38254b4739755ac0c4 (patch)
tree0de34f9c8b81c8787f47ca8973b1bc94e62962ef /unsafe
parent71a3af8a94f900a26ac7094f22ec1216cab62e15 (diff)
downloadspark-7c1e56825b716a7d703dff38254b4739755ac0c4.tar.gz
spark-7c1e56825b716a7d703dff38254b4739755ac0c4.tar.bz2
spark-7c1e56825b716a7d703dff38254b4739755ac0c4.zip
[SPARK-9725] [SQL] fix serialization of UTF8String across different JVM
The BYTE_ARRAY_OFFSET could be different in JVM with different configurations (for example, different heap size, 24 if heap > 32G, otherwise 16), so offset of UTF8String is not portable, we should handler that during serialization. Author: Davies Liu <davies@databricks.com> Closes #8210 from davies/serialize_utf8string.
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java31
1 files changed, 25 insertions, 6 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 667c00900f..cbcab958c0 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -18,8 +18,7 @@
package org.apache.spark.unsafe.types;
import javax.annotation.Nonnull;
-import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
+import java.io.*;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.Map;
@@ -38,12 +37,13 @@ import static org.apache.spark.unsafe.Platform.*;
* <p>
* Note: This is not designed for general use cases, should not be used outside SQL.
*/
-public final class UTF8String implements Comparable<UTF8String>, Serializable {
+public final class UTF8String implements Comparable<UTF8String>, Externalizable {
+ // These are only updated by readExternal()
@Nonnull
- private final Object base;
- private final long offset;
- private final int numBytes;
+ private Object base;
+ private long offset;
+ private int numBytes;
public Object getBaseObject() { return base; }
public long getBaseOffset() { return offset; }
@@ -127,6 +127,11 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
this.numBytes = numBytes;
}
+ // for serialization
+ public UTF8String() {
+ this(null, 0, 0);
+ }
+
/**
* Writes the content of this string into a memory address, identified by an object and an offset.
* The target memory address must already been allocated, and have enough space to hold all the
@@ -978,4 +983,18 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
}
return UTF8String.fromBytes(sx);
}
+
+ public void writeExternal(ObjectOutput out) throws IOException {
+ byte[] bytes = getBytes();
+ out.writeInt(bytes.length);
+ out.write(bytes);
+ }
+
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ offset = BYTE_ARRAY_OFFSET;
+ numBytes = in.readInt();
+ base = new byte[numBytes];
+ in.readFully((byte[]) base);
+ }
+
}