diff options
author | Takeshi YAMAMURO <linguin.m.s@gmail.com> | 2015-10-01 21:33:27 -0400 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-10-01 21:33:27 -0400 |
commit | 2272962eb087ffedaee12c761506e33e45bd0239 (patch) | |
tree | 879167d8dc582986f685ec2338d7696e3317319d /sql | |
parent | 01cd688f5245cbb752863100b399b525b31c3510 (diff) | |
download | spark-2272962eb087ffedaee12c761506e33e45bd0239.tar.gz spark-2272962eb087ffedaee12c761506e33e45bd0239.tar.bz2 spark-2272962eb087ffedaee12c761506e33e45bd0239.zip |
[SPARK-9867] [SQL] Move utilities for binary data into ByteArray
The utilities such as Substring#substringBinarySQL and BinaryPrefixComparator#computePrefix for binary data are put together in ByteArray for easy-to-read.
Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Closes #8122 from maropu/CleanUpForBinaryType.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala | 39 |
1 files changed, 4 insertions, 35 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index a09d5b6e3a..4ab27c044f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -18,14 +18,12 @@ package org.apache.spark.sql.catalyst.expressions import java.text.DecimalFormat -import java.util.Arrays -import java.util.{Map => JMap, HashMap} -import java.util.Locale +import java.util.{HashMap, Locale, Map => JMap} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.unsafe.types.{ByteArray, UTF8String} //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines expressions for string operations. @@ -690,34 +688,6 @@ case class StringSpace(child: Expression) override def prettyName: String = "space" } -object Substring { - def subStringBinarySQL(bytes: Array[Byte], pos: Int, len: Int): Array[Byte] = { - if (pos > bytes.length) { - return Array[Byte]() - } - - var start = if (pos > 0) { - pos - 1 - } else if (pos < 0) { - bytes.length + pos - } else { - 0 - } - - val end = if ((bytes.length - start) < len) { - bytes.length - } else { - start + len - } - - start = Math.max(start, 0) // underflow - if (start < end) { - Arrays.copyOfRange(bytes, start, end) - } else { - Array[Byte]() - } - } -} /** * A function that takes a substring of its first argument starting at a given position. * Defined for String and Binary types. @@ -740,18 +710,17 @@ case class Substring(str: Expression, pos: Expression, len: Expression) str.dataType match { case StringType => string.asInstanceOf[UTF8String] .substringSQL(pos.asInstanceOf[Int], len.asInstanceOf[Int]) - case BinaryType => Substring.subStringBinarySQL(string.asInstanceOf[Array[Byte]], + case BinaryType => ByteArray.subStringSQL(string.asInstanceOf[Array[Byte]], pos.asInstanceOf[Int], len.asInstanceOf[Int]) } } override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { - val cls = classOf[Substring].getName defineCodeGen(ctx, ev, (string, pos, len) => { str.dataType match { case StringType => s"$string.substringSQL($pos, $len)" - case BinaryType => s"$cls.subStringBinarySQL($string, $pos, $len)" + case BinaryType => s"${classOf[ByteArray].getName}.subStringSQL($string, $pos, $len)" } }) } |