diff options
author | Tarek Auel <tarek.auel@googlemail.com> | 2015-07-20 18:21:05 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-20 18:21:05 -0700 |
commit | 6853ac7c8c76003160fc861ddcc8e8e39e4a5924 (patch) | |
tree | 851e8ec11a2a64778bbbfa2f017d93a0f108f139 /unsafe | |
parent | 047ccc8c9a88e74f7bc87709ee5d531f1d7a4228 (diff) | |
download | spark-6853ac7c8c76003160fc861ddcc8e8e39e4a5924.tar.gz spark-6853ac7c8c76003160fc861ddcc8e8e39e4a5924.tar.bz2 spark-6853ac7c8c76003160fc861ddcc8e8e39e4a5924.zip |
[SPARK-9156][SQL] codegen StringSplit
Jira: https://issues.apache.org/jira/browse/SPARK-9156
Author: Tarek Auel <tarek.auel@googlemail.com>
Closes #7547 from tarekauel/SPARK-9156 and squashes the following commits:
0be2700 [Tarek Auel] [SPARK-9156][SQL] indention fix
b860eaf [Tarek Auel] [SPARK-9156][SQL] codegen StringSplit
5ad6a1f [Tarek Auel] [SPARK-9156] codegen StringSplit
Diffstat (limited to 'unsafe')
-rw-r--r-- | unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java | 9 | ||||
-rw-r--r-- | unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java | 11 |
2 files changed, 20 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index fc63fe537d..ed354f7f87 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -487,6 +487,15 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { return fromBytes(result); } + public UTF8String[] split(UTF8String pattern, int limit) { + String[] splits = toString().split(pattern.toString(), limit); + UTF8String[] res = new UTF8String[splits.length]; + for (int i = 0; i < res.length; i++) { + res[i] = fromString(splits[i]); + } + return res; + } + @Override public String toString() { try { diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index d730b1d138..1f5572c509 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -18,6 +18,7 @@ package org.apache.spark.unsafe.types; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import org.junit.Test; @@ -270,6 +271,16 @@ public class UTF8StringSuite { fromString("数据砖头孙行者孙行者孙行"), fromString("数据砖头").rpad(12, fromString("孙行者"))); } + + @Test + public void split() { + assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), -1), + new UTF8String[]{fromString("ab"), fromString("def"), fromString("ghi")})); + assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2), + new UTF8String[]{fromString("ab"), fromString("def,ghi")})); + assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2), + new UTF8String[]{fromString("ab"), fromString("def,ghi")})); + } @Test public void levenshteinDistance() { |