aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorzhichao.li <zhichao.li@intel.com>2015-07-01 22:19:51 -0700
committerDavies Liu <davies@databricks.com>2015-07-01 22:19:51 -0700
commitb285ac5ba85fe0b32b00726ad7d3a2efb602e885 (patch)
treedd4903378a6ff8c7543c2740782ddccb9c7801af /sql
parent4e4f74b5e1267d1ada4a8f57b86aee0d9c17d90a (diff)
downloadspark-b285ac5ba85fe0b32b00726ad7d3a2efb602e885.tar.gz
spark-b285ac5ba85fe0b32b00726ad7d3a2efb602e885.tar.bz2
spark-b285ac5ba85fe0b32b00726ad7d3a2efb602e885.zip
[SPARK-8227] [SQL] Add function unhex
cc chenghao-intel adrian-wang Author: zhichao.li <zhichao.li@intel.com> Closes #7113 from zhichao-li/unhex and squashes the following commits: 379356e [zhichao.li] remove exception checking a4ae6dc [zhichao.li] add udf_unhex to whitelist fe5c14a [zhichao.li] add todigit 607d7a3 [zhichao.li] use checkInputTypes bffd37f [zhichao.li] change to use Hex in apache common package cde73f5 [zhichao.li] update to use AutoCastInputTypes 11945c7 [zhichao.li] style c852d46 [zhichao.li] Add function unhex
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala52
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala18
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala10
-rw-r--r--sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala1
6 files changed, 88 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index d53eaedda5..6f04298d47 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -157,6 +157,7 @@ object FunctionRegistry {
expression[Substring]("substr"),
expression[Substring]("substring"),
expression[Upper]("ucase"),
+ expression[UnHex]("unhex"),
expression[Upper]("upper")
)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
index b51318dd50..8633eb06ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
@@ -351,6 +351,58 @@ case class Pow(left: Expression, right: Expression)
}
}
+/**
+ * Performs the inverse operation of HEX.
+ * Resulting characters are returned as a byte array.
+ */
+case class UnHex(child: Expression) extends UnaryExpression with Serializable {
+
+ override def dataType: DataType = BinaryType
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (child.dataType.isInstanceOf[StringType] || child.dataType == NullType) {
+ TypeCheckResult.TypeCheckSuccess
+ } else {
+ TypeCheckResult.TypeCheckFailure(s"unHex accepts String type, not ${child.dataType}")
+ }
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val num = child.eval(input)
+ if (num == null) {
+ null
+ } else {
+ unhex(num.asInstanceOf[UTF8String].getBytes)
+ }
+ }
+
+ private val unhexDigits = {
+ val array = Array.fill[Byte](128)(-1)
+ (0 to 9).foreach(i => array('0' + i) = i.toByte)
+ (0 to 5).foreach(i => array('A' + i) = (i + 10).toByte)
+ (0 to 5).foreach(i => array('a' + i) = (i + 10).toByte)
+ array
+ }
+
+ private def unhex(inputBytes: Array[Byte]): Array[Byte] = {
+ var bytes = inputBytes
+ if ((bytes.length & 0x01) != 0) {
+ bytes = '0'.toByte +: bytes
+ }
+ val out = new Array[Byte](bytes.length >> 1)
+ // two characters form the hex value.
+ var i = 0
+ while (i < bytes.length) {
+ val first = unhexDigits(bytes(i))
+ val second = unhexDigits(bytes(i + 1))
+ if (first == -1 || second == -1) { return null}
+ out(i / 2) = (((first << 4) | second) & 0xFF).toByte
+ i += 2
+ }
+ out
+ }
+}
+
case class Hypot(left: Expression, right: Expression)
extends BinaryMathExpression(math.hypot, "HYPOT")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
index b932d4ab85..b3345d7069 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
@@ -238,6 +238,12 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
// scalastyle:on
}
+ test("unhex") {
+ checkEvaluation(UnHex(Literal("737472696E67")), "string".getBytes)
+ checkEvaluation(UnHex(Literal("")), new Array[Byte](0))
+ checkEvaluation(UnHex(Literal("0")), Array[Byte](0))
+ }
+
test("hypot") {
testBinary(Hypot, math.hypot)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 4e8f3f96bf..e6f623bdf3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1054,6 +1054,24 @@ object functions {
def hex(colName: String): Column = hex(Column(colName))
/**
+ * Inverse of hex. Interprets each pair of characters as a hexadecimal number
+ * and converts to the byte representation of number.
+ *
+ * @group math_funcs
+ * @since 1.5.0
+ */
+ def unhex(column: Column): Column = UnHex(column.expr)
+
+ /**
+ * Inverse of hex. Interprets each pair of characters as a hexadecimal number
+ * and converts to the byte representation of number.
+ *
+ * @group math_funcs
+ * @since 1.5.0
+ */
+ def unhex(colName: String): Column = unhex(Column(colName))
+
+ /**
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
index d6331aa4ff..c03cde38d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
@@ -225,6 +225,16 @@ class MathExpressionsSuite extends QueryTest {
checkAnswer(data.selectExpr("hex(cast(d as binary))"), Seq(Row("68656C6C6F")))
}
+ test("unhex") {
+ val data = Seq(("1C", "737472696E67")).toDF("a", "b")
+ checkAnswer(data.select(unhex('a)), Row(Array[Byte](28.toByte)))
+ checkAnswer(data.select(unhex('b)), Row("string".getBytes))
+ checkAnswer(data.selectExpr("unhex(a)"), Row(Array[Byte](28.toByte)))
+ checkAnswer(data.selectExpr("unhex(b)"), Row("string".getBytes))
+ checkAnswer(data.selectExpr("""unhex("##")"""), Row(null))
+ checkAnswer(data.selectExpr("""unhex("G123")"""), Row(null))
+ }
+
test("hypot") {
testTwoToOneMathFunction(hypot, hypot, math.hypot)
}
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index f88e62763c..415a81644c 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -949,6 +949,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_trim",
"udf_ucase",
"udf_unix_timestamp",
+ "udf_unhex",
"udf_upper",
"udf_var_pop",
"udf_var_samp",