aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorzhichao.li <zhichao.li@intel.com>2015-08-01 08:48:46 -0700
committerDavies Liu <davies.liu@gmail.com>2015-08-01 08:48:46 -0700
commitc5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce (patch)
treec1b3ddbb2f8697743cd8e11aaeedcdf80d1adec7 /unsafe
parentcf6c9ca32a89422e25007d333bc8714d9b0ae6d8 (diff)
downloadspark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.gz
spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.bz2
spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.zip
[SPARK-8263] [SQL] substr/substring should also support binary type
This is based on #7641, thanks to zhichao-li Closes #7641 Author: zhichao.li <zhichao.li@intel.com> Author: Davies Liu <davies@databricks.com> Closes #7848 from davies/substr and squashes the following commits: 461b709 [Davies Liu] remove bytearry from tests b45377a [Davies Liu] Merge branch 'master' of github.com:apache/spark into substr 01d795e [zhichao.li] scala style 99aa130 [zhichao.li] add substring to dataframe 4f68bfe [zhichao.li] add binary type support for substring
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java17
1 files changed, 11 insertions, 6 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index f6dafe94c6..208503d2fd 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -198,7 +198,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
*/
public UTF8String substring(final int start, final int until) {
if (until <= start || start >= numBytes) {
- return UTF8String.EMPTY_UTF8;
+ return EMPTY_UTF8;
}
int i = 0;
@@ -214,9 +214,13 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
c += 1;
}
- byte[] bytes = new byte[i - j];
- copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j);
- return fromBytes(bytes);
+ if (i > j) {
+ byte[] bytes = new byte[i - j];
+ copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j);
+ return fromBytes(bytes);
+ } else {
+ return EMPTY_UTF8;
+ }
}
public UTF8String substringSQL(int pos, int length) {
@@ -226,8 +230,9 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
// refers to element i-1 in the sequence. If a start index i is less than 0, it refers
// to the -ith element before the end of the sequence. If a start index i is 0, it
// refers to the first element.
- int start = (pos > 0) ? pos -1 : ((pos < 0) ? numChars() + pos : 0);
- int end = (length == Integer.MAX_VALUE) ? Integer.MAX_VALUE : start + length;
+ int len = numChars();
+ int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
+ int end = (length == Integer.MAX_VALUE) ? len : start + length;
return substring(start, end);
}