From 2272962eb087ffedaee12c761506e33e45bd0239 Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Thu, 1 Oct 2015 21:33:27 -0400 Subject: [SPARK-9867] [SQL] Move utilities for binary data into ByteArray The utilities such as Substring#substringBinarySQL and BinaryPrefixComparator#computePrefix for binary data are put together in ByteArray for easy-to-read. Author: Takeshi YAMAMURO Closes #8122 from maropu/CleanUpForBinaryType. --- .../org/apache/spark/unsafe/types/ByteArray.java | 47 +++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'unsafe') diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java index c08c9c73d2..3ced2094f5 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java @@ -19,7 +19,11 @@ package org.apache.spark.unsafe.types; import org.apache.spark.unsafe.Platform; -public class ByteArray { +import java.util.Arrays; + +public final class ByteArray { + + public static final byte[] EMPTY_BYTE = new byte[0]; /** * Writes the content of a byte array into a memory address, identified by an object and an @@ -29,4 +33,45 @@ public class ByteArray { public static void writeToMemory(byte[] src, Object target, long targetOffset) { Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET, target, targetOffset, src.length); } + + /** + * Returns a 64-bit integer that can be used as the prefix used in sorting. + */ + public static long getPrefix(byte[] bytes) { + if (bytes == null) { + return 0L; + } else { + final int minLen = Math.min(bytes.length, 8); + long p = 0; + for (int i = 0; i < minLen; ++i) { + p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i)) + << (56 - 8 * i); + } + return p; + } + } + + public static byte[] subStringSQL(byte[] bytes, int pos, int len) { + // This pos calculation is according to UTF8String#subStringSQL + if (pos > bytes.length) { + return EMPTY_BYTE; + } + int start = 0; + int end; + if (pos > 0) { + start = pos - 1; + } else if (pos < 0) { + start = bytes.length + pos; + } + if ((bytes.length - start) < len) { + end = bytes.length; + } else { + end = start + len; + } + start = Math.max(start, 0); // underflow + if (start >= end) { + return EMPTY_BYTE; + } + return Arrays.copyOfRange(bytes, start, end); + } } -- cgit v1.2.3