[SPARK-8214] [SQL] Add function hex

cc chenghao-intel adrian-wang Author: zhichao.li <zhichao.li@intel.com> Closes #6976 from zhichao-li/hex and squashes the following commits: e218d1b [zhichao.li] turn off scalastyle for non-ascii de3f5ea [zhichao.li] non-ascii char cf9c936 [zhichao.li] give separated buffer for each hex method 967ec90 [zhichao.li] Make 'value' as a feild of Hex 3b2fa13 [zhichao.li] tiny fix a647641 [zhichao.li] remove duplicate null check 7cab020 [zhichao.li] tiny refactoring 35ecfe5 [zhichao.li] add function hex
author: zhichao.li <zhichao.li@intel.com> 2015-06-29 12:25:16 -0700
committer: Davies Liu <davies@databricks.com> 2015-06-29 12:25:16 -0700
commit: 637b4eedad84dcff1769454137a64ac70c7f2397 (patch)
tree: a021502f4b7e65bf252fb22e721c2cb529c7f54c /sql
parent: 94e040d05996111b2b448bcdee1cda184c6d039b (diff)
download: spark-637b4eedad84dcff1769454137a64ac70c7f2397.tar.gz
spark-637b4eedad84dcff1769454137a64ac70c7f2397.tar.bz2
spark-637b4eedad84dcff1769454137a64ac70c7f2397.zip
5 files changed, 125 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index b24064d061..b17457d309 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -113,6 +113,7 @@ object FunctionRegistry {
     expression[Expm1]("expm1"),
     expression[Floor]("floor"),
     expression[Hypot]("hypot"),
+    expression[Hex]("hex"),
     expression[Logarithm]("log"),
     expression[Log]("ln"),
     expression[Log10]("log10"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
index 5694afc61b..4b57ddd9c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.lang.{Long => JLong}
+import java.util.Arrays
 
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StringType}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -273,9 +275,6 @@ case class Atan2(left: Expression, right: Expression)
   }
 }
 
-case class Hypot(left: Expression, right: Expression)
-  extends BinaryMathExpression(math.hypot, "HYPOT")
-
 case class Pow(left: Expression, right: Expression)
   extends BinaryMathExpression(math.pow, "POWER") {
   override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
@@ -287,6 +286,85 @@ case class Pow(left: Expression, right: Expression)
   }
 }
 
+/**
+ * If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format.
+ * Otherwise if the number is a STRING,
+ * it converts each character into its hexadecimal representation and returns the resulting STRING.
+ * Negative numbers would be treated as two's complement.
+ */
+case class Hex(child: Expression)
+  extends UnaryExpression with Serializable  {
+
+  override def dataType: DataType = StringType
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (child.dataType.isInstanceOf[StringType]
+      || child.dataType.isInstanceOf[IntegerType]
+      || child.dataType.isInstanceOf[LongType]
+      || child.dataType.isInstanceOf[BinaryType]
+      || child.dataType == NullType) {
+      TypeCheckResult.TypeCheckSuccess
+    } else {
+      TypeCheckResult.TypeCheckFailure(s"hex doesn't accepts ${child.dataType} type")
+    }
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val num = child.eval(input)
+    if (num == null) {
+      null
+    } else {
+      child.dataType match {
+        case LongType => hex(num.asInstanceOf[Long])
+        case IntegerType => hex(num.asInstanceOf[Integer].toLong)
+        case BinaryType => hex(num.asInstanceOf[Array[Byte]])
+        case StringType => hex(num.asInstanceOf[UTF8String])
+      }
+    }
+  }
+
+  /**
+   * Converts every character in s to two hex digits.
+   */
+  private def hex(str: UTF8String): UTF8String = {
+    hex(str.getBytes)
+  }
+
+  private def hex(bytes: Array[Byte]): UTF8String = {
+    doHex(bytes, bytes.length)
+  }
+
+  private def doHex(bytes: Array[Byte], length: Int): UTF8String = {
+    val value = new Array[Byte](length * 2)
+    var i = 0
+    while(i < length) {
+      value(i * 2) = Character.toUpperCase(Character.forDigit(
+        (bytes(i) & 0xF0) >>> 4, 16)).toByte
+      value(i * 2 + 1) = Character.toUpperCase(Character.forDigit(
+        bytes(i) & 0x0F, 16)).toByte
+      i += 1
+    }
+    UTF8String.fromBytes(value)
+  }
+
+  private def hex(num: Long): UTF8String = {
+    // Extract the hex digits of num into value[] from right to left
+    val value = new Array[Byte](16)
+    var numBuf = num
+    var len = 0
+    do {
+      len += 1
+      value(value.length - len) = Character.toUpperCase(Character
+        .forDigit((numBuf & 0xF).toInt, 16)).toByte
+      numBuf >>>= 4
+    } while (numBuf != 0)
+    UTF8String.fromBytes(Arrays.copyOfRange(value, value.length - len, value.length))
+  }
+}
+
+case class Hypot(left: Expression, right: Expression)
+  extends BinaryMathExpression(math.hypot, "HYPOT")
+
 case class Logarithm(left: Expression, right: Expression)
   extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
index 0d1d5ebdff..b932d4ab85 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.types.{DataType, DoubleType, LongType}
@@ -226,6 +225,19 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     testBinary(Pow, math.pow, Seq((-1.0, 0.9), (-2.2, 1.7), (-2.2, -1.7)), expectNull = true)
   }
 
+  test("hex") {
+    checkEvaluation(Hex(Literal(28)), "1C")
+    checkEvaluation(Hex(Literal(-28)), "FFFFFFFFFFFFFFE4")
+    checkEvaluation(Hex(Literal(100800200404L)), "177828FED4")
+    checkEvaluation(Hex(Literal(-100800200404L)), "FFFFFFE887D7012C")
+    checkEvaluation(Hex(Literal("helloHex")), "68656C6C6F486578")
+    checkEvaluation(Hex(Literal("helloHex".getBytes())), "68656C6C6F486578")
+    // scalastyle:off
+    // Turn off scala style for non-ascii chars
+    checkEvaluation(Hex(Literal("三重的")), "E4B889E9878DE79A84")
+    // scalastyle:on
+  }
+
   test("hypot") {
     testBinary(Hypot, math.hypot)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index ef92801548..5422e066af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1047,6 +1047,22 @@ object functions {
   def floor(columnName: String): Column = floor(Column(columnName))
 
   /**
+   * Computes hex value of the given column
+   *
+   * @group math_funcs
+   * @since 1.5.0
+   */
+  def hex(column: Column): Column = Hex(column.expr)
+
+  /**
+   * Computes hex value of the given input
+   *
+   * @group math_funcs
+   * @since 1.5.0
+   */
+  def hex(colName: String): Column = hex(Column(colName))
+
+  /**
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
index 2768d7dfc8..d6331aa4ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
@@ -212,6 +212,19 @@ class MathExpressionsSuite extends QueryTest {
     )
   }
 
+  test("hex") {
+    val data = Seq((28, -28, 100800200404L, "hello")).toDF("a", "b", "c", "d")
+    checkAnswer(data.select(hex('a)), Seq(Row("1C")))
+    checkAnswer(data.select(hex('b)), Seq(Row("FFFFFFFFFFFFFFE4")))
+    checkAnswer(data.select(hex('c)), Seq(Row("177828FED4")))
+    checkAnswer(data.select(hex('d)), Seq(Row("68656C6C6F")))
+    checkAnswer(data.selectExpr("hex(a)"), Seq(Row("1C")))
+    checkAnswer(data.selectExpr("hex(b)"), Seq(Row("FFFFFFFFFFFFFFE4")))
+    checkAnswer(data.selectExpr("hex(c)"), Seq(Row("177828FED4")))
+    checkAnswer(data.selectExpr("hex(d)"), Seq(Row("68656C6C6F")))
+    checkAnswer(data.selectExpr("hex(cast(d as binary))"), Seq(Row("68656C6C6F")))
+  }
+
   test("hypot") {
     testTwoToOneMathFunction(hypot, hypot, math.hypot)
   }
author	zhichao.li <zhichao.li@intel.com>	2015-06-29 12:25:16 -0700
committer	Davies Liu <davies@databricks.com>	2015-06-29 12:25:16 -0700
commit	637b4eedad84dcff1769454137a64ac70c7f2397 (patch)
tree	a021502f4b7e65bf252fb22e721c2cb529c7f54c /sql
parent	94e040d05996111b2b448bcdee1cda184c6d039b (diff)
download	spark-637b4eedad84dcff1769454137a64ac70c7f2397.tar.gz spark-637b4eedad84dcff1769454137a64ac70c7f2397.tar.bz2 spark-637b4eedad84dcff1769454137a64ac70c7f2397.zip