aboutsummaryrefslogtreecommitdiff
path: root/unsafe/src
diff options
context:
space:
mode:
authorMatthew Brandyberry <mbrandy@us.ibm.com>2015-08-03 17:36:56 -0700
committerDavies Liu <davies.liu@gmail.com>2015-08-03 17:36:56 -0700
commitb79b4f5f2251ed7efeec1f4b26e45a8ea6b85a6a (patch)
tree898535794e3e64f551028c33a054daebbfe6ad9a /unsafe/src
parent7abaaad5b169520fbf7299808b2bafde089a16a2 (diff)
downloadspark-b79b4f5f2251ed7efeec1f4b26e45a8ea6b85a6a.tar.gz
spark-b79b4f5f2251ed7efeec1f4b26e45a8ea6b85a6a.tar.bz2
spark-b79b4f5f2251ed7efeec1f4b26e45a8ea6b85a6a.zip
[SPARK-9483] Fix UTF8String.getPrefix for big-endian.
Previous code assumed little-endian. Author: Matthew Brandyberry <mbrandy@us.ibm.com> Closes #7902 from mtbrandy/SPARK-9483 and squashes the following commits: ec31df8 [Matthew Brandyberry] [SPARK-9483] Changes from review comments. 17d54c6 [Matthew Brandyberry] [SPARK-9483] Fix UTF8String.getPrefix for big-endian.
Diffstat (limited to 'unsafe/src')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java40
1 files changed, 30 insertions, 10 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index f6c9b87778..d80bd57bd2 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -20,6 +20,7 @@ package org.apache.spark.unsafe.types;
import javax.annotation.Nonnull;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
+import java.nio.ByteOrder;
import java.util.Arrays;
import org.apache.spark.unsafe.PlatformDependent;
@@ -53,6 +54,8 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
5, 5, 5, 5,
6, 6};
+ private static ByteOrder byteOrder = ByteOrder.nativeOrder();
+
public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
/**
@@ -175,18 +178,35 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
// If size is greater than 4, assume we have at least 8 bytes of data to fetch.
// After getting the data, we use a mask to mask out data that is not part of the string.
long p;
- if (numBytes >= 8) {
- p = PlatformDependent.UNSAFE.getLong(base, offset);
- } else if (numBytes > 4) {
- p = PlatformDependent.UNSAFE.getLong(base, offset);
- p = p & ((1L << numBytes * 8) - 1);
- } else if (numBytes > 0) {
- p = (long) PlatformDependent.UNSAFE.getInt(base, offset);
- p = p & ((1L << numBytes * 8) - 1);
+ long mask = 0;
+ if (byteOrder == ByteOrder.LITTLE_ENDIAN) {
+ if (numBytes >= 8) {
+ p = PlatformDependent.UNSAFE.getLong(base, offset);
+ } else if (numBytes > 4) {
+ p = PlatformDependent.UNSAFE.getLong(base, offset);
+ mask = (1L << (8 - numBytes) * 8) - 1;
+ } else if (numBytes > 0) {
+ p = (long) PlatformDependent.UNSAFE.getInt(base, offset);
+ mask = (1L << (8 - numBytes) * 8) - 1;
+ } else {
+ p = 0;
+ }
+ p = java.lang.Long.reverseBytes(p);
} else {
- p = 0;
+ // byteOrder == ByteOrder.BIG_ENDIAN
+ if (numBytes >= 8) {
+ p = PlatformDependent.UNSAFE.getLong(base, offset);
+ } else if (numBytes > 4) {
+ p = PlatformDependent.UNSAFE.getLong(base, offset);
+ mask = (1L << (8 - numBytes) * 8) - 1;
+ } else if (numBytes > 0) {
+ p = ((long) PlatformDependent.UNSAFE.getInt(base, offset)) << 32;
+ mask = (1L << (8 - numBytes) * 8) - 1;
+ } else {
+ p = 0;
+ }
}
- p = java.lang.Long.reverseBytes(p);
+ p &= ~mask;
return p;
}