diff options
author | Tarek Auel <tarek.auel@googlemail.com> | 2015-07-20 19:17:59 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-07-20 19:17:59 -0700 |
commit | 936a96cb31a6dd7d8685bce05103e779ca02e763 (patch) | |
tree | 022bacb7c8a0047270dcd31b8f7c19c0b0e94dc9 | |
parent | e90543e5366808332bbde18d78cccd4d064a3338 (diff) | |
download | spark-936a96cb31a6dd7d8685bce05103e779ca02e763.tar.gz spark-936a96cb31a6dd7d8685bce05103e779ca02e763.tar.bz2 spark-936a96cb31a6dd7d8685bce05103e779ca02e763.zip |
[SPARK-9164] [SQL] codegen hex/unhex
Jira: https://issues.apache.org/jira/browse/SPARK-9164
The diff looks heavy, but I just moved the `hex` and `unhex` methods to `object Hex`. This allows me to call them from `eval` and `codeGen`
Author: Tarek Auel <tarek.auel@googlemail.com>
Closes #7548 from tarekauel/SPARK-9164 and squashes the following commits:
dd91c57 [Tarek Auel] [SPARK-9164][SQL] codegen hex/unhex
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala | 96 |
1 files changed, 57 insertions, 39 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala index 7ce64d29ba..7a9be02ba4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala @@ -489,28 +489,8 @@ object Hex { (0 to 5).foreach(i => array('a' + i) = (i + 10).toByte) array } -} -/** - * If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format. - * Otherwise if the number is a STRING, it converts each character into its hex representation - * and returns the resulting STRING. Negative numbers would be treated as two's complement. - */ -case class Hex(child: Expression) - extends UnaryExpression with ImplicitCastInputTypes with CodegenFallback { - - override def inputTypes: Seq[AbstractDataType] = - Seq(TypeCollection(LongType, BinaryType, StringType)) - - override def dataType: DataType = StringType - - protected override def nullSafeEval(num: Any): Any = child.dataType match { - case LongType => hex(num.asInstanceOf[Long]) - case BinaryType => hex(num.asInstanceOf[Array[Byte]]) - case StringType => hex(num.asInstanceOf[UTF8String].getBytes) - } - - private[this] def hex(bytes: Array[Byte]): UTF8String = { + def hex(bytes: Array[Byte]): UTF8String = { val length = bytes.length val value = new Array[Byte](length * 2) var i = 0 @@ -522,7 +502,7 @@ case class Hex(child: Expression) UTF8String.fromBytes(value) } - private def hex(num: Long): UTF8String = { + def hex(num: Long): UTF8String = { // Extract the hex digits of num into value[] from right to left val value = new Array[Byte](16) var numBuf = num @@ -534,24 +514,8 @@ case class Hex(child: Expression) } while (numBuf != 0) UTF8String.fromBytes(java.util.Arrays.copyOfRange(value, value.length - len, value.length)) } -} -/** - * Performs the inverse operation of HEX. - * Resulting characters are returned as a byte array. - */ -case class Unhex(child: Expression) - extends UnaryExpression with ImplicitCastInputTypes with CodegenFallback { - - override def inputTypes: Seq[AbstractDataType] = Seq(StringType) - - override def nullable: Boolean = true - override def dataType: DataType = BinaryType - - protected override def nullSafeEval(num: Any): Any = - unhex(num.asInstanceOf[UTF8String].getBytes) - - private[this] def unhex(bytes: Array[Byte]): Array[Byte] = { + def unhex(bytes: Array[Byte]): Array[Byte] = { val out = new Array[Byte]((bytes.length + 1) >> 1) var i = 0 if ((bytes.length & 0x01) != 0) { @@ -583,6 +547,60 @@ case class Unhex(child: Expression) } } +/** + * If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format. + * Otherwise if the number is a STRING, it converts each character into its hex representation + * and returns the resulting STRING. Negative numbers would be treated as two's complement. + */ +case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = + Seq(TypeCollection(LongType, BinaryType, StringType)) + + override def dataType: DataType = StringType + + protected override def nullSafeEval(num: Any): Any = child.dataType match { + case LongType => Hex.hex(num.asInstanceOf[Long]) + case BinaryType => Hex.hex(num.asInstanceOf[Array[Byte]]) + case StringType => Hex.hex(num.asInstanceOf[UTF8String].getBytes) + } + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + nullSafeCodeGen(ctx, ev, (c) => { + val hex = Hex.getClass.getName.stripSuffix("$") + s"${ev.primitive} = " + (child.dataType match { + case StringType => s"""$hex.hex($c.getBytes());""" + case _ => s"""$hex.hex($c);""" + }) + }) + } +} + +/** + * Performs the inverse operation of HEX. + * Resulting characters are returned as a byte array. + */ +case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + + override def nullable: Boolean = true + override def dataType: DataType = BinaryType + + protected override def nullSafeEval(num: Any): Any = + Hex.unhex(num.asInstanceOf[UTF8String].getBytes) + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + nullSafeCodeGen(ctx, ev, (c) => { + val hex = Hex.getClass.getName.stripSuffix("$") + s""" + ${ev.primitive} = $hex.unhex($c.getBytes()); + ${ev.isNull} = ${ev.primitive} == null; + """ + }) + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// |