diff options
author | zhichao.li <zhichao.li@intel.com> | 2015-08-06 09:02:30 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-08-06 09:02:30 -0700 |
commit | aead18ffca36830e854fba32a1cac11a0b2e31d5 (patch) | |
tree | 172fe1d83e6c691abad40c4114662db84fe8fbf2 /unsafe/src | |
parent | d5a9af3230925c347d0904fe7f2402e468e80bc8 (diff) | |
download | spark-aead18ffca36830e854fba32a1cac11a0b2e31d5.tar.gz spark-aead18ffca36830e854fba32a1cac11a0b2e31d5.tar.bz2 spark-aead18ffca36830e854fba32a1cac11a0b2e31d5.zip |
[SPARK-8266] [SQL] add function translate
![translate](http://www.w3resource.com/PostgreSQL/postgresql-translate-function.png)
Author: zhichao.li <zhichao.li@intel.com>
Closes #7709 from zhichao-li/translate and squashes the following commits:
9418088 [zhichao.li] refine checking condition
f2ab77a [zhichao.li] clone string
9d88f2d [zhichao.li] fix indent
6aa2962 [zhichao.li] style
e575ead [zhichao.li] add python api
9d4bab0 [zhichao.li] add special case for fodable and refactor unittest
eda7ad6 [zhichao.li] update to use TernaryExpression
cdfd4be [zhichao.li] add function translate
Diffstat (limited to 'unsafe/src')
-rw-r--r-- | unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java | 16 | ||||
-rw-r--r-- | unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java | 31 |
2 files changed, 47 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index febbe3d4e5..d1014426c0 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -22,6 +22,7 @@ import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.nio.ByteOrder; import java.util.Arrays; +import java.util.Map; import org.apache.spark.unsafe.PlatformDependent; import org.apache.spark.unsafe.array.ByteArrayMethods; @@ -795,6 +796,21 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { return res; } + // TODO: Need to use `Code Point` here instead of Char in case the character longer than 2 bytes + public UTF8String translate(Map<Character, Character> dict) { + String srcStr = this.toString(); + + StringBuilder sb = new StringBuilder(); + for(int k = 0; k< srcStr.length(); k++) { + if (null == dict.get(srcStr.charAt(k))) { + sb.append(srcStr.charAt(k)); + } else if ('\0' != dict.get(srcStr.charAt(k))){ + sb.append(dict.get(srcStr.charAt(k))); + } + } + return fromString(sb.toString()); + } + @Override public String toString() { try { diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index b30c94c1c1..98aa8a2469 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -19,7 +19,9 @@ package org.apache.spark.unsafe.types; import java.io.UnsupportedEncodingException; import java.util.Arrays; +import java.util.HashMap; +import com.google.common.collect.ImmutableMap; import org.junit.Test; import static junit.framework.Assert.*; @@ -392,6 +394,35 @@ public class UTF8StringSuite { } @Test + public void translate() { + assertEquals( + fromString("1a2s3ae"), + fromString("translate").translate(ImmutableMap.of( + 'r', '1', + 'n', '2', + 'l', '3', + 't', '\0' + ))); + assertEquals( + fromString("translate"), + fromString("translate").translate(new HashMap<Character, Character>())); + assertEquals( + fromString("asae"), + fromString("translate").translate(ImmutableMap.of( + 'r', '\0', + 'n', '\0', + 'l', '\0', + 't', '\0' + ))); + assertEquals( + fromString("aa世b"), + fromString("花花世界").translate(ImmutableMap.of( + '花', 'a', + '界', 'b' + ))); + } + + @Test public void createBlankString() { assertEquals(fromString(" "), blankString(1)); assertEquals(fromString(" "), blankString(2)); |