aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorzhichao.li <zhichao.li@intel.com>2015-08-06 09:02:30 -0700
committerDavies Liu <davies.liu@gmail.com>2015-08-06 09:02:30 -0700
commitaead18ffca36830e854fba32a1cac11a0b2e31d5 (patch)
tree172fe1d83e6c691abad40c4114662db84fe8fbf2 /unsafe
parentd5a9af3230925c347d0904fe7f2402e468e80bc8 (diff)
downloadspark-aead18ffca36830e854fba32a1cac11a0b2e31d5.tar.gz
spark-aead18ffca36830e854fba32a1cac11a0b2e31d5.tar.bz2
spark-aead18ffca36830e854fba32a1cac11a0b2e31d5.zip
[SPARK-8266] [SQL] add function translate
![translate](http://www.w3resource.com/PostgreSQL/postgresql-translate-function.png) Author: zhichao.li <zhichao.li@intel.com> Closes #7709 from zhichao-li/translate and squashes the following commits: 9418088 [zhichao.li] refine checking condition f2ab77a [zhichao.li] clone string 9d88f2d [zhichao.li] fix indent 6aa2962 [zhichao.li] style e575ead [zhichao.li] add python api 9d4bab0 [zhichao.li] add special case for fodable and refactor unittest eda7ad6 [zhichao.li] update to use TernaryExpression cdfd4be [zhichao.li] add function translate
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java16
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java31
2 files changed, 47 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index febbe3d4e5..d1014426c0 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -22,6 +22,7 @@ import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.nio.ByteOrder;
import java.util.Arrays;
+import java.util.Map;
import org.apache.spark.unsafe.PlatformDependent;
import org.apache.spark.unsafe.array.ByteArrayMethods;
@@ -795,6 +796,21 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
return res;
}
+ // TODO: Need to use `Code Point` here instead of Char in case the character longer than 2 bytes
+ public UTF8String translate(Map<Character, Character> dict) {
+ String srcStr = this.toString();
+
+ StringBuilder sb = new StringBuilder();
+ for(int k = 0; k< srcStr.length(); k++) {
+ if (null == dict.get(srcStr.charAt(k))) {
+ sb.append(srcStr.charAt(k));
+ } else if ('\0' != dict.get(srcStr.charAt(k))){
+ sb.append(dict.get(srcStr.charAt(k)));
+ }
+ }
+ return fromString(sb.toString());
+ }
+
@Override
public String toString() {
try {
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index b30c94c1c1..98aa8a2469 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -19,7 +19,9 @@ package org.apache.spark.unsafe.types;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
+import java.util.HashMap;
+import com.google.common.collect.ImmutableMap;
import org.junit.Test;
import static junit.framework.Assert.*;
@@ -392,6 +394,35 @@ public class UTF8StringSuite {
}
@Test
+ public void translate() {
+ assertEquals(
+ fromString("1a2s3ae"),
+ fromString("translate").translate(ImmutableMap.of(
+ 'r', '1',
+ 'n', '2',
+ 'l', '3',
+ 't', '\0'
+ )));
+ assertEquals(
+ fromString("translate"),
+ fromString("translate").translate(new HashMap<Character, Character>()));
+ assertEquals(
+ fromString("asae"),
+ fromString("translate").translate(ImmutableMap.of(
+ 'r', '\0',
+ 'n', '\0',
+ 'l', '\0',
+ 't', '\0'
+ )));
+ assertEquals(
+ fromString("aa世b"),
+ fromString("花花世界").translate(ImmutableMap.of(
+ '花', 'a',
+ '界', 'b'
+ )));
+ }
+
+ @Test
public void createBlankString() {
assertEquals(fromString(" "), blankString(1));
assertEquals(fromString(" "), blankString(2));