diff options
author | zhichao.li <zhichao.li@intel.com> | 2015-08-01 08:48:46 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-08-01 08:48:46 -0700 |
commit | c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce (patch) | |
tree | c1b3ddbb2f8697743cd8e11aaeedcdf80d1adec7 /unsafe/src | |
parent | cf6c9ca32a89422e25007d333bc8714d9b0ae6d8 (diff) | |
download | spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.gz spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.bz2 spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.zip |
[SPARK-8263] [SQL] substr/substring should also support binary type
This is based on #7641, thanks to zhichao-li
Closes #7641
Author: zhichao.li <zhichao.li@intel.com>
Author: Davies Liu <davies@databricks.com>
Closes #7848 from davies/substr and squashes the following commits:
461b709 [Davies Liu] remove bytearry from tests
b45377a [Davies Liu] Merge branch 'master' of github.com:apache/spark into substr
01d795e [zhichao.li] scala style
99aa130 [zhichao.li] add substring to dataframe
4f68bfe [zhichao.li] add binary type support for substring
Diffstat (limited to 'unsafe/src')
-rw-r--r-- | unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index f6dafe94c6..208503d2fd 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -198,7 +198,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { */ public UTF8String substring(final int start, final int until) { if (until <= start || start >= numBytes) { - return UTF8String.EMPTY_UTF8; + return EMPTY_UTF8; } int i = 0; @@ -214,9 +214,13 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { c += 1; } - byte[] bytes = new byte[i - j]; - copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j); - return fromBytes(bytes); + if (i > j) { + byte[] bytes = new byte[i - j]; + copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j); + return fromBytes(bytes); + } else { + return EMPTY_UTF8; + } } public UTF8String substringSQL(int pos, int length) { @@ -226,8 +230,9 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { // refers to element i-1 in the sequence. If a start index i is less than 0, it refers // to the -ith element before the end of the sequence. If a start index i is 0, it // refers to the first element. - int start = (pos > 0) ? pos -1 : ((pos < 0) ? numChars() + pos : 0); - int end = (length == Integer.MAX_VALUE) ? Integer.MAX_VALUE : start + length; + int len = numChars(); + int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0); + int end = (length == Integer.MAX_VALUE) ? len : start + length; return substring(start, end); } |