aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorTarek Auel <tarek.auel@googlemail.com>2015-07-20 18:16:49 -0700
committerReynold Xin <rxin@databricks.com>2015-07-20 18:16:49 -0700
commit047ccc8c9a88e74f7bc87709ee5d531f1d7a4228 (patch)
tree6fc36642a057b4d53a648a3efc3f2e2739a869a1 /unsafe
parent66bb8003b949860b8652542e1232bc48665448c2 (diff)
downloadspark-047ccc8c9a88e74f7bc87709ee5d531f1d7a4228.tar.gz
spark-047ccc8c9a88e74f7bc87709ee5d531f1d7a4228.tar.bz2
spark-047ccc8c9a88e74f7bc87709ee5d531f1d7a4228.zip
[SPARK-9178][SQL] Add an empty string constant to UTF8String
Jira: https://issues.apache.org/jira/browse/SPARK-9178 In order to avoid calls of `UTF8String.fromString("")` this pr adds an `EMPTY_STRING` constant to `UTF8String`. An `UTF8String` is immutable, so we can use a constant, isn't it? I searched for current usage of `UTF8String.fromString("")` with `grep -R "UTF8String.fromString(\"\")" .` Author: Tarek Auel <tarek.auel@googlemail.com> Closes #7509 from tarekauel/SPARK-9178 and squashes the following commits: 8d6c405 [Tarek Auel] [SPARK-9178] revert intellij indents 3627b80 [Tarek Auel] [SPARK-9178] revert concat tests changes 3f5fbf5 [Tarek Auel] [SPARK-9178] rebase and add final to UTF8String.EMPTY_UTF8 47cda68 [Tarek Auel] Merge branch 'master' into SPARK-9178 4a37344 [Tarek Auel] [SPARK-9178] changed name to EMPTY_UTF8, added tests 748b87a [Tarek Auel] [SPARK-9178] Add empty string constant to UTF8String
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java2
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java76
2 files changed, 39 insertions, 39 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 819639f300..fc63fe537d 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -50,6 +50,8 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
5, 5, 5, 5,
6, 6, 6, 6};
+ public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
+
/**
* Creates an UTF8String from byte array, which should be encoded in UTF-8.
*
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 6a21c27461..d730b1d138 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -55,6 +55,14 @@ public class UTF8StringSuite {
}
@Test
+ public void emptyStringTest() {
+ assertEquals(fromString(""), EMPTY_UTF8);
+ assertEquals(fromBytes(new byte[0]), EMPTY_UTF8);
+ assertEquals(0, EMPTY_UTF8.numChars());
+ assertEquals(0, EMPTY_UTF8.numBytes());
+ }
+
+ @Test
public void compareTo() {
assertTrue(fromString("abc").compareTo(fromString("ABC")) > 0);
assertTrue(fromString("abc0").compareTo(fromString("abc")) > 0);
@@ -88,9 +96,9 @@ public class UTF8StringSuite {
@Test
public void concatTest() {
- assertEquals(fromString(""), concat());
+ assertEquals(EMPTY_UTF8, concat());
assertEquals(null, concat((UTF8String) null));
- assertEquals(fromString(""), concat(fromString("")));
+ assertEquals(EMPTY_UTF8, concat(EMPTY_UTF8));
assertEquals(fromString("ab"), concat(fromString("ab")));
assertEquals(fromString("ab"), concat(fromString("a"), fromString("b")));
assertEquals(fromString("abc"), concat(fromString("a"), fromString("b"), fromString("c")));
@@ -109,8 +117,8 @@ public class UTF8StringSuite {
// If separator is null, concatWs should skip all null inputs and never return null.
UTF8String sep = fromString("哈哈");
assertEquals(
- fromString(""),
- concatWs(sep, fromString("")));
+ EMPTY_UTF8,
+ concatWs(sep, EMPTY_UTF8));
assertEquals(
fromString("ab"),
concatWs(sep, fromString("ab")));
@@ -127,7 +135,7 @@ public class UTF8StringSuite {
fromString("a"),
concatWs(sep, fromString("a"), null, null));
assertEquals(
- fromString(""),
+ EMPTY_UTF8,
concatWs(sep, null, null, null));
assertEquals(
fromString("数据哈哈砖头"),
@@ -136,7 +144,7 @@ public class UTF8StringSuite {
@Test
public void contains() {
- assertTrue(fromString("").contains(fromString("")));
+ assertTrue(EMPTY_UTF8.contains(EMPTY_UTF8));
assertTrue(fromString("hello").contains(fromString("ello")));
assertFalse(fromString("hello").contains(fromString("vello")));
assertFalse(fromString("hello").contains(fromString("hellooo")));
@@ -147,7 +155,7 @@ public class UTF8StringSuite {
@Test
public void startsWith() {
- assertTrue(fromString("").startsWith(fromString("")));
+ assertTrue(EMPTY_UTF8.startsWith(EMPTY_UTF8));
assertTrue(fromString("hello").startsWith(fromString("hell")));
assertFalse(fromString("hello").startsWith(fromString("ell")));
assertFalse(fromString("hello").startsWith(fromString("hellooo")));
@@ -158,7 +166,7 @@ public class UTF8StringSuite {
@Test
public void endsWith() {
- assertTrue(fromString("").endsWith(fromString("")));
+ assertTrue(EMPTY_UTF8.endsWith(EMPTY_UTF8));
assertTrue(fromString("hello").endsWith(fromString("ello")));
assertFalse(fromString("hello").endsWith(fromString("ellov")));
assertFalse(fromString("hello").endsWith(fromString("hhhello")));
@@ -169,7 +177,7 @@ public class UTF8StringSuite {
@Test
public void substring() {
- assertEquals(fromString(""), fromString("hello").substring(0, 0));
+ assertEquals(EMPTY_UTF8, fromString("hello").substring(0, 0));
assertEquals(fromString("el"), fromString("hello").substring(1, 3));
assertEquals(fromString("数"), fromString("数据砖头").substring(0, 1));
assertEquals(fromString("据砖"), fromString("数据砖头").substring(1, 3));
@@ -183,9 +191,9 @@ public class UTF8StringSuite {
assertEquals(fromString("hello "), fromString(" hello ").trimLeft());
assertEquals(fromString(" hello"), fromString(" hello ").trimRight());
- assertEquals(fromString(""), fromString(" ").trim());
- assertEquals(fromString(""), fromString(" ").trimLeft());
- assertEquals(fromString(""), fromString(" ").trimRight());
+ assertEquals(EMPTY_UTF8, fromString(" ").trim());
+ assertEquals(EMPTY_UTF8, fromString(" ").trimLeft());
+ assertEquals(EMPTY_UTF8, fromString(" ").trimRight());
assertEquals(fromString("数据砖头"), fromString(" 数据砖头 ").trim());
assertEquals(fromString("数据砖头 "), fromString(" 数据砖头 ").trimLeft());
@@ -198,9 +206,9 @@ public class UTF8StringSuite {
@Test
public void indexOf() {
- assertEquals(0, fromString("").indexOf(fromString(""), 0));
- assertEquals(-1, fromString("").indexOf(fromString("l"), 0));
- assertEquals(0, fromString("hello").indexOf(fromString(""), 0));
+ assertEquals(0, EMPTY_UTF8.indexOf(EMPTY_UTF8, 0));
+ assertEquals(-1, EMPTY_UTF8.indexOf(fromString("l"), 0));
+ assertEquals(0, fromString("hello").indexOf(EMPTY_UTF8, 0));
assertEquals(2, fromString("hello").indexOf(fromString("l"), 0));
assertEquals(3, fromString("hello").indexOf(fromString("l"), 3));
assertEquals(-1, fromString("hello").indexOf(fromString("a"), 0));
@@ -215,7 +223,7 @@ public class UTF8StringSuite {
@Test
public void reverse() {
assertEquals(fromString("olleh"), fromString("hello").reverse());
- assertEquals(fromString(""), fromString("").reverse());
+ assertEquals(EMPTY_UTF8, EMPTY_UTF8.reverse());
assertEquals(fromString("者行孙"), fromString("孙行者").reverse());
assertEquals(fromString("者行孙 olleh"), fromString("hello 孙行者").reverse());
}
@@ -224,7 +232,7 @@ public class UTF8StringSuite {
public void repeat() {
assertEquals(fromString("数d数d数d数d数d"), fromString("数d").repeat(5));
assertEquals(fromString("数d"), fromString("数d").repeat(1));
- assertEquals(fromString(""), fromString("数d").repeat(-1));
+ assertEquals(EMPTY_UTF8, fromString("数d").repeat(-1));
}
@Test
@@ -234,14 +242,14 @@ public class UTF8StringSuite {
assertEquals(fromString("?hello"), fromString("hello").lpad(6, fromString("????")));
assertEquals(fromString("???????hello"), fromString("hello").lpad(12, fromString("????")));
assertEquals(fromString("?????hello"), fromString("hello").lpad(10, fromString("?????")));
- assertEquals(fromString("???????"), fromString("").lpad(7, fromString("?????")));
+ assertEquals(fromString("???????"), EMPTY_UTF8.lpad(7, fromString("?????")));
assertEquals(fromString("hel"), fromString("hello").rpad(3, fromString("????")));
assertEquals(fromString("hello"), fromString("hello").rpad(5, fromString("????")));
assertEquals(fromString("hello?"), fromString("hello").rpad(6, fromString("????")));
assertEquals(fromString("hello???????"), fromString("hello").rpad(12, fromString("????")));
assertEquals(fromString("hello?????"), fromString("hello").rpad(10, fromString("?????")));
- assertEquals(fromString("???????"), fromString("").rpad(7, fromString("?????")));
+ assertEquals(fromString("???????"), EMPTY_UTF8.rpad(7, fromString("?????")));
assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, fromString("????")));
@@ -265,26 +273,16 @@ public class UTF8StringSuite {
@Test
public void levenshteinDistance() {
- assertEquals(
- UTF8String.fromString("").levenshteinDistance(UTF8String.fromString("")), 0);
- assertEquals(
- UTF8String.fromString("").levenshteinDistance(UTF8String.fromString("a")), 1);
- assertEquals(
- UTF8String.fromString("aaapppp").levenshteinDistance(UTF8String.fromString("")), 7);
- assertEquals(
- UTF8String.fromString("frog").levenshteinDistance(UTF8String.fromString("fog")), 1);
- assertEquals(
- UTF8String.fromString("fly").levenshteinDistance(UTF8String.fromString("ant")),3);
- assertEquals(
- UTF8String.fromString("elephant").levenshteinDistance(UTF8String.fromString("hippo")), 7);
- assertEquals(
- UTF8String.fromString("hippo").levenshteinDistance(UTF8String.fromString("elephant")), 7);
- assertEquals(
- UTF8String.fromString("hippo").levenshteinDistance(UTF8String.fromString("zzzzzzzz")), 8);
- assertEquals(
- UTF8String.fromString("hello").levenshteinDistance(UTF8String.fromString("hallo")),1);
- assertEquals(
- UTF8String.fromString("世界千世").levenshteinDistance(UTF8String.fromString("千a世b")),4);
+ assertEquals(EMPTY_UTF8.levenshteinDistance(EMPTY_UTF8), 0);
+ assertEquals(EMPTY_UTF8.levenshteinDistance(fromString("a")), 1);
+ assertEquals(fromString("aaapppp").levenshteinDistance(EMPTY_UTF8), 7);
+ assertEquals(fromString("frog").levenshteinDistance(fromString("fog")), 1);
+ assertEquals(fromString("fly").levenshteinDistance(fromString("ant")),3);
+ assertEquals(fromString("elephant").levenshteinDistance(fromString("hippo")), 7);
+ assertEquals(fromString("hippo").levenshteinDistance(fromString("elephant")), 7);
+ assertEquals(fromString("hippo").levenshteinDistance(fromString("zzzzzzzz")), 8);
+ assertEquals(fromString("hello").levenshteinDistance(fromString("hallo")),1);
+ assertEquals(fromString("世界千世").levenshteinDistance(fromString("千a世b")),4);
}
@Test