aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorzhichao.li <zhichao.li@intel.com>2015-06-29 12:25:16 -0700
committerDavies Liu <davies@databricks.com>2015-06-29 12:25:16 -0700
commit637b4eedad84dcff1769454137a64ac70c7f2397 (patch)
treea021502f4b7e65bf252fb22e721c2cb529c7f54c /sql
parent94e040d05996111b2b448bcdee1cda184c6d039b (diff)
downloadspark-637b4eedad84dcff1769454137a64ac70c7f2397.tar.gz
spark-637b4eedad84dcff1769454137a64ac70c7f2397.tar.bz2
spark-637b4eedad84dcff1769454137a64ac70c7f2397.zip
[SPARK-8214] [SQL] Add function hex
cc chenghao-intel adrian-wang Author: zhichao.li <zhichao.li@intel.com> Closes #6976 from zhichao-li/hex and squashes the following commits: e218d1b [zhichao.li] turn off scalastyle for non-ascii de3f5ea [zhichao.li] non-ascii char cf9c936 [zhichao.li] give separated buffer for each hex method 967ec90 [zhichao.li] Make 'value' as a feild of Hex 3b2fa13 [zhichao.li] tiny fix a647641 [zhichao.li] remove duplicate null check 7cab020 [zhichao.li] tiny refactoring 35ecfe5 [zhichao.li] add function hex
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala86
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala14
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala16
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala13
5 files changed, 125 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index b24064d061..b17457d309 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -113,6 +113,7 @@ object FunctionRegistry {
expression[Expm1]("expm1"),
expression[Floor]("floor"),
expression[Hypot]("hypot"),
+ expression[Hex]("hex"),
expression[Logarithm]("log"),
expression[Log]("ln"),
expression[Log10]("log10"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
index 5694afc61b..4b57ddd9c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
@@ -18,9 +18,11 @@
package org.apache.spark.sql.catalyst.expressions
import java.lang.{Long => JLong}
+import java.util.Arrays
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StringType}
+import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
/**
@@ -273,9 +275,6 @@ case class Atan2(left: Expression, right: Expression)
}
}
-case class Hypot(left: Expression, right: Expression)
- extends BinaryMathExpression(math.hypot, "HYPOT")
-
case class Pow(left: Expression, right: Expression)
extends BinaryMathExpression(math.pow, "POWER") {
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
@@ -287,6 +286,85 @@ case class Pow(left: Expression, right: Expression)
}
}
+/**
+ * If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format.
+ * Otherwise if the number is a STRING,
+ * it converts each character into its hexadecimal representation and returns the resulting STRING.
+ * Negative numbers would be treated as two's complement.
+ */
+case class Hex(child: Expression)
+ extends UnaryExpression with Serializable {
+
+ override def dataType: DataType = StringType
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (child.dataType.isInstanceOf[StringType]
+ || child.dataType.isInstanceOf[IntegerType]
+ || child.dataType.isInstanceOf[LongType]
+ || child.dataType.isInstanceOf[BinaryType]
+ || child.dataType == NullType) {
+ TypeCheckResult.TypeCheckSuccess
+ } else {
+ TypeCheckResult.TypeCheckFailure(s"hex doesn't accepts ${child.dataType} type")
+ }
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val num = child.eval(input)
+ if (num == null) {
+ null
+ } else {
+ child.dataType match {
+ case LongType => hex(num.asInstanceOf[Long])
+ case IntegerType => hex(num.asInstanceOf[Integer].toLong)
+ case BinaryType => hex(num.asInstanceOf[Array[Byte]])
+ case StringType => hex(num.asInstanceOf[UTF8String])
+ }
+ }
+ }
+
+ /**
+ * Converts every character in s to two hex digits.
+ */
+ private def hex(str: UTF8String): UTF8String = {
+ hex(str.getBytes)
+ }
+
+ private def hex(bytes: Array[Byte]): UTF8String = {
+ doHex(bytes, bytes.length)
+ }
+
+ private def doHex(bytes: Array[Byte], length: Int): UTF8String = {
+ val value = new Array[Byte](length * 2)
+ var i = 0
+ while(i < length) {
+ value(i * 2) = Character.toUpperCase(Character.forDigit(
+ (bytes(i) & 0xF0) >>> 4, 16)).toByte
+ value(i * 2 + 1) = Character.toUpperCase(Character.forDigit(
+ bytes(i) & 0x0F, 16)).toByte
+ i += 1
+ }
+ UTF8String.fromBytes(value)
+ }
+
+ private def hex(num: Long): UTF8String = {
+ // Extract the hex digits of num into value[] from right to left
+ val value = new Array[Byte](16)
+ var numBuf = num
+ var len = 0
+ do {
+ len += 1
+ value(value.length - len) = Character.toUpperCase(Character
+ .forDigit((numBuf & 0xF).toInt, 16)).toByte
+ numBuf >>>= 4
+ } while (numBuf != 0)
+ UTF8String.fromBytes(Arrays.copyOfRange(value, value.length - len, value.length))
+ }
+}
+
+case class Hypot(left: Expression, right: Expression)
+ extends BinaryMathExpression(math.hypot, "HYPOT")
+
case class Logarithm(left: Expression, right: Expression)
extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
index 0d1d5ebdff..b932d4ab85 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.catalyst.expressions
-import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.types.{DataType, DoubleType, LongType}
@@ -226,6 +225,19 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
testBinary(Pow, math.pow, Seq((-1.0, 0.9), (-2.2, 1.7), (-2.2, -1.7)), expectNull = true)
}
+ test("hex") {
+ checkEvaluation(Hex(Literal(28)), "1C")
+ checkEvaluation(Hex(Literal(-28)), "FFFFFFFFFFFFFFE4")
+ checkEvaluation(Hex(Literal(100800200404L)), "177828FED4")
+ checkEvaluation(Hex(Literal(-100800200404L)), "FFFFFFE887D7012C")
+ checkEvaluation(Hex(Literal("helloHex")), "68656C6C6F486578")
+ checkEvaluation(Hex(Literal("helloHex".getBytes())), "68656C6C6F486578")
+ // scalastyle:off
+ // Turn off scala style for non-ascii chars
+ checkEvaluation(Hex(Literal("δΈ‰ι‡ηš„")), "E4B889E9878DE79A84")
+ // scalastyle:on
+ }
+
test("hypot") {
testBinary(Hypot, math.hypot)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index ef92801548..5422e066af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1047,6 +1047,22 @@ object functions {
def floor(columnName: String): Column = floor(Column(columnName))
/**
+ * Computes hex value of the given column
+ *
+ * @group math_funcs
+ * @since 1.5.0
+ */
+ def hex(column: Column): Column = Hex(column.expr)
+
+ /**
+ * Computes hex value of the given input
+ *
+ * @group math_funcs
+ * @since 1.5.0
+ */
+ def hex(colName: String): Column = hex(Column(colName))
+
+ /**
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
index 2768d7dfc8..d6331aa4ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
@@ -212,6 +212,19 @@ class MathExpressionsSuite extends QueryTest {
)
}
+ test("hex") {
+ val data = Seq((28, -28, 100800200404L, "hello")).toDF("a", "b", "c", "d")
+ checkAnswer(data.select(hex('a)), Seq(Row("1C")))
+ checkAnswer(data.select(hex('b)), Seq(Row("FFFFFFFFFFFFFFE4")))
+ checkAnswer(data.select(hex('c)), Seq(Row("177828FED4")))
+ checkAnswer(data.select(hex('d)), Seq(Row("68656C6C6F")))
+ checkAnswer(data.selectExpr("hex(a)"), Seq(Row("1C")))
+ checkAnswer(data.selectExpr("hex(b)"), Seq(Row("FFFFFFFFFFFFFFE4")))
+ checkAnswer(data.selectExpr("hex(c)"), Seq(Row("177828FED4")))
+ checkAnswer(data.selectExpr("hex(d)"), Seq(Row("68656C6C6F")))
+ checkAnswer(data.selectExpr("hex(cast(d as binary))"), Seq(Row("68656C6C6F")))
+ }
+
test("hypot") {
testTwoToOneMathFunction(hypot, hypot, math.hypot)
}