diff options
author | Herman van Hovell <hvanhovell@databricks.com> | 2016-09-01 12:01:22 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2016-09-01 12:01:22 -0700 |
commit | 2be5f8d7e0819de03971d0af6fa310793d2d0e65 (patch) | |
tree | 602c660b7215536f901af5cc729664a5f0b4cf49 /sql/catalyst/src/main/scala/org/apache | |
parent | a0aac4b775bc8c275f96ad0fbf85c9d8a3690588 (diff) | |
download | spark-2be5f8d7e0819de03971d0af6fa310793d2d0e65.tar.gz spark-2be5f8d7e0819de03971d0af6fa310793d2d0e65.tar.bz2 spark-2be5f8d7e0819de03971d0af6fa310793d2d0e65.zip |
[SPARK-17263][SQL] Add hexadecimal literal parsing
## What changes were proposed in this pull request?
This PR adds the ability to parse SQL (hexadecimal) binary literals (AKA bit strings). It follows the following syntax `X'[Hexadecimal Characters]+'`, for example: `X'01AB'` would create a binary the following binary array `0x01AB`.
If an uneven number of hexadecimal characters is passed, then the upper 4 bits of the initial byte are kept empty, and the lower 4 bits are filled using the first character. For example `X'1C7'` would create the following binary array `0x01C7`.
Binary data (Array[Byte]) does not have a proper `hashCode` and `equals` functions. This meant that comparing `Literal`s containing binary data was a pain. I have updated Literal.hashCode and Literal.equals to deal properly with binary data.
## How was this patch tested?
Added tests to the `ExpressionParserSuite`, `SQLQueryTestSuite` and `ExpressionSQLBuilderSuite`.
Author: Herman van Hovell <hvanhovell@databricks.com>
Closes #14832 from hvanhovell/SPARK-17263.
Diffstat (limited to 'sql/catalyst/src/main/scala/org/apache')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala | 26 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 29 |
2 files changed, 41 insertions, 14 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 41e3952f0e..a597a17aad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.expressions import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} +import java.util import java.util.Objects +import javax.xml.bind.DatatypeConverter import org.json4s.JsonAST._ @@ -168,14 +170,29 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression with override def foldable: Boolean = true override def nullable: Boolean = value == null - override def toString: String = if (value != null) value.toString else "null" + override def toString: String = value match { + case null => "null" + case binary: Array[Byte] => s"0x" + DatatypeConverter.printHexBinary(binary) + case other => other.toString + } - override def hashCode(): Int = 31 * (31 * Objects.hashCode(dataType)) + Objects.hashCode(value) + override def hashCode(): Int = { + val valueHashCode = value match { + case null => 0 + case binary: Array[Byte] => util.Arrays.hashCode(binary) + case other => other.hashCode() + } + 31 * Objects.hashCode(dataType) + valueHashCode + } override def equals(other: Any): Boolean = other match { + case o: Literal if !dataType.equals(o.dataType) => false case o: Literal => - dataType.equals(o.dataType) && - (value == null && null == o.value || value != null && value.equals(o.value)) + (value, o.value) match { + case (null, null) => true + case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b) + case (a, b) => a != null && a.equals(b) + } case _ => false } @@ -269,6 +286,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression with case (v: Decimal, t: DecimalType) => v + "BD" case (v: Int, DateType) => s"DATE '${DateTimeUtils.toJavaDate(v)}'" case (v: Long, TimestampType) => s"TIMESTAMP('${DateTimeUtils.toJavaTimestamp(v)}')" + case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'" case _ => value.toString } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 893db93368..42fbc16d03 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.parser import java.sql.{Date, Timestamp} +import javax.xml.bind.DatatypeConverter import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer @@ -1215,19 +1216,27 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * {{{ * [TYPE] '[VALUE]' * }}} - * Currently Date and Timestamp typed literals are supported. - * - * TODO what the added value of this over casting? + * Currently Date, Timestamp and Binary typed literals are supported. */ override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) { val value = string(ctx.STRING) - ctx.identifier.getText.toUpperCase match { - case "DATE" => - Literal(Date.valueOf(value)) - case "TIMESTAMP" => - Literal(Timestamp.valueOf(value)) - case other => - throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) + val valueType = ctx.identifier.getText.toUpperCase + try { + valueType match { + case "DATE" => + Literal(Date.valueOf(value)) + case "TIMESTAMP" => + Literal(Timestamp.valueOf(value)) + case "X" => + val padding = if (value.length % 2 == 1) "0" else "" + Literal(DatatypeConverter.parseHexBinary(padding + value)) + case other => + throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) + } + } catch { + case e: IllegalArgumentException => + val message = Option(e.getMessage).getOrElse(s"Exception parsing $valueType") + throw new ParseException(message, ctx) } } |