diff options
author | Reynold Xin <rxin@databricks.com> | 2015-07-18 14:07:56 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-18 14:07:56 -0700 |
commit | 6e1e2eba696e89ba57bf5450b9c72c4386e43dc8 (patch) | |
tree | ff09024d2e45656f412ba39f331508f1cf436eab /unsafe | |
parent | 3d2134fc0d90379b89da08de7614aef1ac674b1b (diff) | |
download | spark-6e1e2eba696e89ba57bf5450b9c72c4386e43dc8.tar.gz spark-6e1e2eba696e89ba57bf5450b9c72c4386e43dc8.tar.bz2 spark-6e1e2eba696e89ba57bf5450b9c72c4386e43dc8.zip |
[SPARK-8240][SQL] string function: concat
Author: Reynold Xin <rxin@databricks.com>
Closes #7486 from rxin/concat and squashes the following commits:
5217d6e [Reynold Xin] Removed Hive's concat test.
f5cb7a3 [Reynold Xin] Concat is never nullable.
ae4e61f [Reynold Xin] Removed extra import.
fddcbbd [Reynold Xin] Fixed NPE.
22e831c [Reynold Xin] Added missing file.
57a2352 [Reynold Xin] [SPARK-8240][SQL] string function: concat
Diffstat (limited to 'unsafe')
-rw-r--r-- | unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java | 40 | ||||
-rw-r--r-- | unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java | 14 |
2 files changed, 51 insertions, 3 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index e7f9fbb2bc..9723b6e083 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -21,6 +21,7 @@ import javax.annotation.Nonnull; import java.io.Serializable; import java.io.UnsupportedEncodingException; +import org.apache.spark.unsafe.PlatformDependent; import org.apache.spark.unsafe.array.ByteArrayMethods; import static org.apache.spark.unsafe.PlatformDependent.*; @@ -322,7 +323,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { } i += numBytesForFirstByte(getByte(i)); c += 1; - } while(i < numBytes); + } while (i < numBytes); return -1; } @@ -395,6 +396,39 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { } } + /** + * Concatenates input strings together into a single string. A null input is skipped. + * For example, concat("a", null, "c") would yield "ac". + */ + public static UTF8String concat(UTF8String... inputs) { + if (inputs == null) { + return fromBytes(new byte[0]); + } + + // Compute the total length of the result. + int totalLength = 0; + for (int i = 0; i < inputs.length; i++) { + if (inputs[i] != null) { + totalLength += inputs[i].numBytes; + } + } + + // Allocate a new byte array, and copy the inputs one by one into it. + final byte[] result = new byte[totalLength]; + int offset = 0; + for (int i = 0; i < inputs.length; i++) { + if (inputs[i] != null) { + int len = inputs[i].numBytes; + PlatformDependent.copyMemory( + inputs[i].base, inputs[i].offset, + result, PlatformDependent.BYTE_ARRAY_OFFSET + offset, + len); + offset += len; + } + } + return fromBytes(result); + } + @Override public String toString() { try { @@ -413,7 +447,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { } @Override - public int compareTo(final UTF8String other) { + public int compareTo(@Nonnull final UTF8String other) { int len = Math.min(numBytes, other.numBytes); // TODO: compare 8 bytes as unsigned long for (int i = 0; i < len; i ++) { @@ -434,7 +468,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { public boolean equals(final Object other) { if (other instanceof UTF8String) { UTF8String o = (UTF8String) other; - if (numBytes != o.numBytes){ + if (numBytes != o.numBytes) { return false; } return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes); diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index 694bdc29f3..0db7522b50 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -87,6 +87,20 @@ public class UTF8StringSuite { } @Test + public void concatTest() { + assertEquals(concat(), fromString("")); + assertEquals(concat(null), fromString("")); + assertEquals(concat(fromString("")), fromString("")); + assertEquals(concat(fromString("ab")), fromString("ab")); + assertEquals(concat(fromString("a"), fromString("b")), fromString("ab")); + assertEquals(concat(fromString("a"), fromString("b"), fromString("c")), fromString("abc")); + assertEquals(concat(fromString("a"), null, fromString("c")), fromString("ac")); + assertEquals(concat(fromString("a"), null, null), fromString("a")); + assertEquals(concat(null, null, null), fromString("")); + assertEquals(concat(fromString("数据"), fromString("砖头")), fromString("数据砖头")); + } + + @Test public void contains() { assertTrue(fromString("").contains(fromString(""))); assertTrue(fromString("hello").contains(fromString("ello"))); |