diff options
author | Tarek Auel <tarek.auel@googlemail.com> | 2015-07-20 18:16:49 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-20 18:16:49 -0700 |
commit | 047ccc8c9a88e74f7bc87709ee5d531f1d7a4228 (patch) | |
tree | 6fc36642a057b4d53a648a3efc3f2e2739a869a1 /unsafe/src/test/java | |
parent | 66bb8003b949860b8652542e1232bc48665448c2 (diff) | |
download | spark-047ccc8c9a88e74f7bc87709ee5d531f1d7a4228.tar.gz spark-047ccc8c9a88e74f7bc87709ee5d531f1d7a4228.tar.bz2 spark-047ccc8c9a88e74f7bc87709ee5d531f1d7a4228.zip |
[SPARK-9178][SQL] Add an empty string constant to UTF8String
Jira: https://issues.apache.org/jira/browse/SPARK-9178
In order to avoid calls of `UTF8String.fromString("")` this pr adds an `EMPTY_STRING` constant to `UTF8String`. An `UTF8String` is immutable, so we can use a constant, isn't it?
I searched for current usage of `UTF8String.fromString("")` with
`grep -R "UTF8String.fromString(\"\")" .`
Author: Tarek Auel <tarek.auel@googlemail.com>
Closes #7509 from tarekauel/SPARK-9178 and squashes the following commits:
8d6c405 [Tarek Auel] [SPARK-9178] revert intellij indents
3627b80 [Tarek Auel] [SPARK-9178] revert concat tests changes
3f5fbf5 [Tarek Auel] [SPARK-9178] rebase and add final to UTF8String.EMPTY_UTF8
47cda68 [Tarek Auel] Merge branch 'master' into SPARK-9178
4a37344 [Tarek Auel] [SPARK-9178] changed name to EMPTY_UTF8, added tests
748b87a [Tarek Auel] [SPARK-9178] Add empty string constant to UTF8String
Diffstat (limited to 'unsafe/src/test/java')
-rw-r--r-- | unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java | 76 |
1 files changed, 37 insertions, 39 deletions
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index 6a21c27461..d730b1d138 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -55,6 +55,14 @@ public class UTF8StringSuite { } @Test + public void emptyStringTest() { + assertEquals(fromString(""), EMPTY_UTF8); + assertEquals(fromBytes(new byte[0]), EMPTY_UTF8); + assertEquals(0, EMPTY_UTF8.numChars()); + assertEquals(0, EMPTY_UTF8.numBytes()); + } + + @Test public void compareTo() { assertTrue(fromString("abc").compareTo(fromString("ABC")) > 0); assertTrue(fromString("abc0").compareTo(fromString("abc")) > 0); @@ -88,9 +96,9 @@ public class UTF8StringSuite { @Test public void concatTest() { - assertEquals(fromString(""), concat()); + assertEquals(EMPTY_UTF8, concat()); assertEquals(null, concat((UTF8String) null)); - assertEquals(fromString(""), concat(fromString(""))); + assertEquals(EMPTY_UTF8, concat(EMPTY_UTF8)); assertEquals(fromString("ab"), concat(fromString("ab"))); assertEquals(fromString("ab"), concat(fromString("a"), fromString("b"))); assertEquals(fromString("abc"), concat(fromString("a"), fromString("b"), fromString("c"))); @@ -109,8 +117,8 @@ public class UTF8StringSuite { // If separator is null, concatWs should skip all null inputs and never return null. UTF8String sep = fromString("哈哈"); assertEquals( - fromString(""), - concatWs(sep, fromString(""))); + EMPTY_UTF8, + concatWs(sep, EMPTY_UTF8)); assertEquals( fromString("ab"), concatWs(sep, fromString("ab"))); @@ -127,7 +135,7 @@ public class UTF8StringSuite { fromString("a"), concatWs(sep, fromString("a"), null, null)); assertEquals( - fromString(""), + EMPTY_UTF8, concatWs(sep, null, null, null)); assertEquals( fromString("数据哈哈砖头"), @@ -136,7 +144,7 @@ public class UTF8StringSuite { @Test public void contains() { - assertTrue(fromString("").contains(fromString(""))); + assertTrue(EMPTY_UTF8.contains(EMPTY_UTF8)); assertTrue(fromString("hello").contains(fromString("ello"))); assertFalse(fromString("hello").contains(fromString("vello"))); assertFalse(fromString("hello").contains(fromString("hellooo"))); @@ -147,7 +155,7 @@ public class UTF8StringSuite { @Test public void startsWith() { - assertTrue(fromString("").startsWith(fromString(""))); + assertTrue(EMPTY_UTF8.startsWith(EMPTY_UTF8)); assertTrue(fromString("hello").startsWith(fromString("hell"))); assertFalse(fromString("hello").startsWith(fromString("ell"))); assertFalse(fromString("hello").startsWith(fromString("hellooo"))); @@ -158,7 +166,7 @@ public class UTF8StringSuite { @Test public void endsWith() { - assertTrue(fromString("").endsWith(fromString(""))); + assertTrue(EMPTY_UTF8.endsWith(EMPTY_UTF8)); assertTrue(fromString("hello").endsWith(fromString("ello"))); assertFalse(fromString("hello").endsWith(fromString("ellov"))); assertFalse(fromString("hello").endsWith(fromString("hhhello"))); @@ -169,7 +177,7 @@ public class UTF8StringSuite { @Test public void substring() { - assertEquals(fromString(""), fromString("hello").substring(0, 0)); + assertEquals(EMPTY_UTF8, fromString("hello").substring(0, 0)); assertEquals(fromString("el"), fromString("hello").substring(1, 3)); assertEquals(fromString("数"), fromString("数据砖头").substring(0, 1)); assertEquals(fromString("据砖"), fromString("数据砖头").substring(1, 3)); @@ -183,9 +191,9 @@ public class UTF8StringSuite { assertEquals(fromString("hello "), fromString(" hello ").trimLeft()); assertEquals(fromString(" hello"), fromString(" hello ").trimRight()); - assertEquals(fromString(""), fromString(" ").trim()); - assertEquals(fromString(""), fromString(" ").trimLeft()); - assertEquals(fromString(""), fromString(" ").trimRight()); + assertEquals(EMPTY_UTF8, fromString(" ").trim()); + assertEquals(EMPTY_UTF8, fromString(" ").trimLeft()); + assertEquals(EMPTY_UTF8, fromString(" ").trimRight()); assertEquals(fromString("数据砖头"), fromString(" 数据砖头 ").trim()); assertEquals(fromString("数据砖头 "), fromString(" 数据砖头 ").trimLeft()); @@ -198,9 +206,9 @@ public class UTF8StringSuite { @Test public void indexOf() { - assertEquals(0, fromString("").indexOf(fromString(""), 0)); - assertEquals(-1, fromString("").indexOf(fromString("l"), 0)); - assertEquals(0, fromString("hello").indexOf(fromString(""), 0)); + assertEquals(0, EMPTY_UTF8.indexOf(EMPTY_UTF8, 0)); + assertEquals(-1, EMPTY_UTF8.indexOf(fromString("l"), 0)); + assertEquals(0, fromString("hello").indexOf(EMPTY_UTF8, 0)); assertEquals(2, fromString("hello").indexOf(fromString("l"), 0)); assertEquals(3, fromString("hello").indexOf(fromString("l"), 3)); assertEquals(-1, fromString("hello").indexOf(fromString("a"), 0)); @@ -215,7 +223,7 @@ public class UTF8StringSuite { @Test public void reverse() { assertEquals(fromString("olleh"), fromString("hello").reverse()); - assertEquals(fromString(""), fromString("").reverse()); + assertEquals(EMPTY_UTF8, EMPTY_UTF8.reverse()); assertEquals(fromString("者行孙"), fromString("孙行者").reverse()); assertEquals(fromString("者行孙 olleh"), fromString("hello 孙行者").reverse()); } @@ -224,7 +232,7 @@ public class UTF8StringSuite { public void repeat() { assertEquals(fromString("数d数d数d数d数d"), fromString("数d").repeat(5)); assertEquals(fromString("数d"), fromString("数d").repeat(1)); - assertEquals(fromString(""), fromString("数d").repeat(-1)); + assertEquals(EMPTY_UTF8, fromString("数d").repeat(-1)); } @Test @@ -234,14 +242,14 @@ public class UTF8StringSuite { assertEquals(fromString("?hello"), fromString("hello").lpad(6, fromString("????"))); assertEquals(fromString("???????hello"), fromString("hello").lpad(12, fromString("????"))); assertEquals(fromString("?????hello"), fromString("hello").lpad(10, fromString("?????"))); - assertEquals(fromString("???????"), fromString("").lpad(7, fromString("?????"))); + assertEquals(fromString("???????"), EMPTY_UTF8.lpad(7, fromString("?????"))); assertEquals(fromString("hel"), fromString("hello").rpad(3, fromString("????"))); assertEquals(fromString("hello"), fromString("hello").rpad(5, fromString("????"))); assertEquals(fromString("hello?"), fromString("hello").rpad(6, fromString("????"))); assertEquals(fromString("hello???????"), fromString("hello").rpad(12, fromString("????"))); assertEquals(fromString("hello?????"), fromString("hello").rpad(10, fromString("?????"))); - assertEquals(fromString("???????"), fromString("").rpad(7, fromString("?????"))); + assertEquals(fromString("???????"), EMPTY_UTF8.rpad(7, fromString("?????"))); assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, fromString("????"))); @@ -265,26 +273,16 @@ public class UTF8StringSuite { @Test public void levenshteinDistance() { - assertEquals( - UTF8String.fromString("").levenshteinDistance(UTF8String.fromString("")), 0); - assertEquals( - UTF8String.fromString("").levenshteinDistance(UTF8String.fromString("a")), 1); - assertEquals( - UTF8String.fromString("aaapppp").levenshteinDistance(UTF8String.fromString("")), 7); - assertEquals( - UTF8String.fromString("frog").levenshteinDistance(UTF8String.fromString("fog")), 1); - assertEquals( - UTF8String.fromString("fly").levenshteinDistance(UTF8String.fromString("ant")),3); - assertEquals( - UTF8String.fromString("elephant").levenshteinDistance(UTF8String.fromString("hippo")), 7); - assertEquals( - UTF8String.fromString("hippo").levenshteinDistance(UTF8String.fromString("elephant")), 7); - assertEquals( - UTF8String.fromString("hippo").levenshteinDistance(UTF8String.fromString("zzzzzzzz")), 8); - assertEquals( - UTF8String.fromString("hello").levenshteinDistance(UTF8String.fromString("hallo")),1); - assertEquals( - UTF8String.fromString("世界千世").levenshteinDistance(UTF8String.fromString("千a世b")),4); + assertEquals(EMPTY_UTF8.levenshteinDistance(EMPTY_UTF8), 0); + assertEquals(EMPTY_UTF8.levenshteinDistance(fromString("a")), 1); + assertEquals(fromString("aaapppp").levenshteinDistance(EMPTY_UTF8), 7); + assertEquals(fromString("frog").levenshteinDistance(fromString("fog")), 1); + assertEquals(fromString("fly").levenshteinDistance(fromString("ant")),3); + assertEquals(fromString("elephant").levenshteinDistance(fromString("hippo")), 7); + assertEquals(fromString("hippo").levenshteinDistance(fromString("elephant")), 7); + assertEquals(fromString("hippo").levenshteinDistance(fromString("zzzzzzzz")), 8); + assertEquals(fromString("hello").levenshteinDistance(fromString("hallo")),1); + assertEquals(fromString("世界千世").levenshteinDistance(fromString("千a世b")),4); } @Test |