aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-07-18 14:07:56 -0700
committerReynold Xin <rxin@databricks.com>2015-07-18 14:07:56 -0700
commit6e1e2eba696e89ba57bf5450b9c72c4386e43dc8 (patch)
treeff09024d2e45656f412ba39f331508f1cf436eab /unsafe
parent3d2134fc0d90379b89da08de7614aef1ac674b1b (diff)
downloadspark-6e1e2eba696e89ba57bf5450b9c72c4386e43dc8.tar.gz
spark-6e1e2eba696e89ba57bf5450b9c72c4386e43dc8.tar.bz2
spark-6e1e2eba696e89ba57bf5450b9c72c4386e43dc8.zip
[SPARK-8240][SQL] string function: concat
Author: Reynold Xin <rxin@databricks.com> Closes #7486 from rxin/concat and squashes the following commits: 5217d6e [Reynold Xin] Removed Hive's concat test. f5cb7a3 [Reynold Xin] Concat is never nullable. ae4e61f [Reynold Xin] Removed extra import. fddcbbd [Reynold Xin] Fixed NPE. 22e831c [Reynold Xin] Added missing file. 57a2352 [Reynold Xin] [SPARK-8240][SQL] string function: concat
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java40
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java14
2 files changed, 51 insertions, 3 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index e7f9fbb2bc..9723b6e083 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -21,6 +21,7 @@ import javax.annotation.Nonnull;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
+import org.apache.spark.unsafe.PlatformDependent;
import org.apache.spark.unsafe.array.ByteArrayMethods;
import static org.apache.spark.unsafe.PlatformDependent.*;
@@ -322,7 +323,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
}
i += numBytesForFirstByte(getByte(i));
c += 1;
- } while(i < numBytes);
+ } while (i < numBytes);
return -1;
}
@@ -395,6 +396,39 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
}
}
+ /**
+ * Concatenates input strings together into a single string. A null input is skipped.
+ * For example, concat("a", null, "c") would yield "ac".
+ */
+ public static UTF8String concat(UTF8String... inputs) {
+ if (inputs == null) {
+ return fromBytes(new byte[0]);
+ }
+
+ // Compute the total length of the result.
+ int totalLength = 0;
+ for (int i = 0; i < inputs.length; i++) {
+ if (inputs[i] != null) {
+ totalLength += inputs[i].numBytes;
+ }
+ }
+
+ // Allocate a new byte array, and copy the inputs one by one into it.
+ final byte[] result = new byte[totalLength];
+ int offset = 0;
+ for (int i = 0; i < inputs.length; i++) {
+ if (inputs[i] != null) {
+ int len = inputs[i].numBytes;
+ PlatformDependent.copyMemory(
+ inputs[i].base, inputs[i].offset,
+ result, PlatformDependent.BYTE_ARRAY_OFFSET + offset,
+ len);
+ offset += len;
+ }
+ }
+ return fromBytes(result);
+ }
+
@Override
public String toString() {
try {
@@ -413,7 +447,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
}
@Override
- public int compareTo(final UTF8String other) {
+ public int compareTo(@Nonnull final UTF8String other) {
int len = Math.min(numBytes, other.numBytes);
// TODO: compare 8 bytes as unsigned long
for (int i = 0; i < len; i ++) {
@@ -434,7 +468,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
public boolean equals(final Object other) {
if (other instanceof UTF8String) {
UTF8String o = (UTF8String) other;
- if (numBytes != o.numBytes){
+ if (numBytes != o.numBytes) {
return false;
}
return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes);
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 694bdc29f3..0db7522b50 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -87,6 +87,20 @@ public class UTF8StringSuite {
}
@Test
+ public void concatTest() {
+ assertEquals(concat(), fromString(""));
+ assertEquals(concat(null), fromString(""));
+ assertEquals(concat(fromString("")), fromString(""));
+ assertEquals(concat(fromString("ab")), fromString("ab"));
+ assertEquals(concat(fromString("a"), fromString("b")), fromString("ab"));
+ assertEquals(concat(fromString("a"), fromString("b"), fromString("c")), fromString("abc"));
+ assertEquals(concat(fromString("a"), null, fromString("c")), fromString("ac"));
+ assertEquals(concat(fromString("a"), null, null), fromString("a"));
+ assertEquals(concat(null, null, null), fromString(""));
+ assertEquals(concat(fromString("数据"), fromString("砖头")), fromString("数据砖头"));
+ }
+
+ @Test
public void contains() {
assertTrue(fromString("").contains(fromString("")));
assertTrue(fromString("hello").contains(fromString("ello")));