aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorTarek Auel <tarek.auel@googlemail.com>2015-07-20 18:21:05 -0700
committerReynold Xin <rxin@databricks.com>2015-07-20 18:21:05 -0700
commit6853ac7c8c76003160fc861ddcc8e8e39e4a5924 (patch)
tree851e8ec11a2a64778bbbfa2f017d93a0f108f139 /unsafe
parent047ccc8c9a88e74f7bc87709ee5d531f1d7a4228 (diff)
downloadspark-6853ac7c8c76003160fc861ddcc8e8e39e4a5924.tar.gz
spark-6853ac7c8c76003160fc861ddcc8e8e39e4a5924.tar.bz2
spark-6853ac7c8c76003160fc861ddcc8e8e39e4a5924.zip
[SPARK-9156][SQL] codegen StringSplit
Jira: https://issues.apache.org/jira/browse/SPARK-9156 Author: Tarek Auel <tarek.auel@googlemail.com> Closes #7547 from tarekauel/SPARK-9156 and squashes the following commits: 0be2700 [Tarek Auel] [SPARK-9156][SQL] indention fix b860eaf [Tarek Auel] [SPARK-9156][SQL] codegen StringSplit 5ad6a1f [Tarek Auel] [SPARK-9156] codegen StringSplit
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java9
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java11
2 files changed, 20 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index fc63fe537d..ed354f7f87 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -487,6 +487,15 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
return fromBytes(result);
}
+ public UTF8String[] split(UTF8String pattern, int limit) {
+ String[] splits = toString().split(pattern.toString(), limit);
+ UTF8String[] res = new UTF8String[splits.length];
+ for (int i = 0; i < res.length; i++) {
+ res[i] = fromString(splits[i]);
+ }
+ return res;
+ }
+
@Override
public String toString() {
try {
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index d730b1d138..1f5572c509 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -18,6 +18,7 @@
package org.apache.spark.unsafe.types;
import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
import org.junit.Test;
@@ -270,6 +271,16 @@ public class UTF8StringSuite {
fromString("数据砖头孙行者孙行者孙行"),
fromString("数据砖头").rpad(12, fromString("孙行者")));
}
+
+ @Test
+ public void split() {
+ assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), -1),
+ new UTF8String[]{fromString("ab"), fromString("def"), fromString("ghi")}));
+ assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
+ new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
+ assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
+ new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
+ }
@Test
public void levenshteinDistance() {