aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala6
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java30
3 files changed, 22 insertions, 18 deletions
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index c4b7f8490a..ed04d2e50e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -312,7 +312,6 @@ public final class UnsafeRow extends BaseMutableRow {
public UTF8String getUTF8String(int i) {
assertIndexIsValid(i);
- final UTF8String str = new UTF8String();
final long offsetToStringSize = getLong(i);
final int stringSizeInBytes =
(int) PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + offsetToStringSize);
@@ -324,8 +323,7 @@ public final class UnsafeRow extends BaseMutableRow {
PlatformDependent.BYTE_ARRAY_OFFSET,
stringSizeInBytes
);
- str.set(strBytes);
- return str;
+ return UTF8String.fromBytes(strBytes);
}
@Override
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index b20086bcc4..ad920f2878 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -438,17 +438,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
case (BinaryType, StringType) =>
defineCodeGen (ctx, ev, c =>
- s"new ${ctx.stringType}().set($c)")
+ s"${ctx.stringType}.fromBytes($c)")
case (DateType, StringType) =>
defineCodeGen(ctx, ev, c =>
- s"""new ${ctx.stringType}().set(
+ s"""${ctx.stringType}.fromString(
org.apache.spark.sql.catalyst.util.DateUtils.toString($c))""")
// Special handling required for timestamps in hive test cases since the toString function
// does not match the expected output.
case (TimestampType, StringType) =>
super.genCode(ctx, ev)
case (_, StringType) =>
- defineCodeGen(ctx, ev, c => s"new ${ctx.stringType}().set(String.valueOf($c))")
+ defineCodeGen(ctx, ev, c => s"${ctx.stringType}.fromString(String.valueOf($c))")
// fallback for DecimalType, this must be before other numeric types
case (_, dt: DecimalType) =>
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index a351680195..9871a70a40 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -20,7 +20,7 @@ package org.apache.spark.unsafe.types;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
-import javax.annotation.Nullable;
+import javax.annotation.Nonnull;
import org.apache.spark.unsafe.PlatformDependent;
@@ -34,7 +34,7 @@ import org.apache.spark.unsafe.PlatformDependent;
*/
public final class UTF8String implements Comparable<UTF8String>, Serializable {
- @Nullable
+ @Nonnull
private byte[] bytes;
private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -55,7 +55,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
/**
* Updates the UTF8String with String.
*/
- public UTF8String set(final String str) {
+ protected UTF8String set(final String str) {
try {
bytes = str.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
@@ -69,7 +69,7 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
/**
* Updates the UTF8String with byte[], which should be encoded in UTF-8.
*/
- public UTF8String set(final byte[] bytes) {
+ protected UTF8String set(final byte[] bytes) {
this.bytes = bytes;
return this;
}
@@ -131,24 +131,30 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
}
for (int i = 0; i <= bytes.length - b.length; i++) {
- // TODO: Avoid copying.
- if (bytes[i] == b[0] && Arrays.equals(Arrays.copyOfRange(bytes, i, i + b.length), b)) {
+ if (bytes[i] == b[0] && startsWith(b, i)) {
return true;
}
}
return false;
}
+ private boolean startsWith(final byte[] prefix, int offsetInBytes) {
+ if (prefix.length + offsetInBytes > bytes.length || offsetInBytes < 0) {
+ return false;
+ }
+ int i = 0;
+ while (i < prefix.length && prefix[i] == bytes[i + offsetInBytes]) {
+ i++;
+ }
+ return i == prefix.length;
+ }
+
public boolean startsWith(final UTF8String prefix) {
- final byte[] b = prefix.getBytes();
- // TODO: Avoid copying.
- return b.length <= bytes.length && Arrays.equals(Arrays.copyOfRange(bytes, 0, b.length), b);
+ return startsWith(prefix.getBytes(), 0);
}
public boolean endsWith(final UTF8String suffix) {
- final byte[] b = suffix.getBytes();
- return b.length <= bytes.length &&
- Arrays.equals(Arrays.copyOfRange(bytes, bytes.length - b.length, bytes.length), b);
+ return startsWith(suffix.getBytes(), bytes.length - suffix.getBytes().length);
}
public UTF8String toUpperCase() {