aboutsummaryrefslogtreecommitdiff
path: root/unsafe/src
diff options
context:
space:
mode:
authorTakeshi YAMAMURO <linguin.m.s@gmail.com>2015-10-01 21:33:27 -0400
committerReynold Xin <rxin@databricks.com>2015-10-01 21:33:27 -0400
commit2272962eb087ffedaee12c761506e33e45bd0239 (patch)
tree879167d8dc582986f685ec2338d7696e3317319d /unsafe/src
parent01cd688f5245cbb752863100b399b525b31c3510 (diff)
downloadspark-2272962eb087ffedaee12c761506e33e45bd0239.tar.gz
spark-2272962eb087ffedaee12c761506e33e45bd0239.tar.bz2
spark-2272962eb087ffedaee12c761506e33e45bd0239.zip
[SPARK-9867] [SQL] Move utilities for binary data into ByteArray
The utilities such as Substring#substringBinarySQL and BinaryPrefixComparator#computePrefix for binary data are put together in ByteArray for easy-to-read. Author: Takeshi YAMAMURO <linguin.m.s@gmail.com> Closes #8122 from maropu/CleanUpForBinaryType.
Diffstat (limited to 'unsafe/src')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java47
1 files changed, 46 insertions, 1 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index c08c9c73d2..3ced2094f5 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -19,7 +19,11 @@ package org.apache.spark.unsafe.types;
import org.apache.spark.unsafe.Platform;
-public class ByteArray {
+import java.util.Arrays;
+
+public final class ByteArray {
+
+ public static final byte[] EMPTY_BYTE = new byte[0];
/**
* Writes the content of a byte array into a memory address, identified by an object and an
@@ -29,4 +33,45 @@ public class ByteArray {
public static void writeToMemory(byte[] src, Object target, long targetOffset) {
Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET, target, targetOffset, src.length);
}
+
+ /**
+ * Returns a 64-bit integer that can be used as the prefix used in sorting.
+ */
+ public static long getPrefix(byte[] bytes) {
+ if (bytes == null) {
+ return 0L;
+ } else {
+ final int minLen = Math.min(bytes.length, 8);
+ long p = 0;
+ for (int i = 0; i < minLen; ++i) {
+ p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
+ << (56 - 8 * i);
+ }
+ return p;
+ }
+ }
+
+ public static byte[] subStringSQL(byte[] bytes, int pos, int len) {
+ // This pos calculation is according to UTF8String#subStringSQL
+ if (pos > bytes.length) {
+ return EMPTY_BYTE;
+ }
+ int start = 0;
+ int end;
+ if (pos > 0) {
+ start = pos - 1;
+ } else if (pos < 0) {
+ start = bytes.length + pos;
+ }
+ if ((bytes.length - start) < len) {
+ end = bytes.length;
+ } else {
+ end = start + len;
+ }
+ start = Math.max(start, 0); // underflow
+ if (start >= end) {
+ return EMPTY_BYTE;
+ }
+ return Arrays.copyOfRange(bytes, start, end);
+ }
}