aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main/scala/org/apache
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@databricks.com>2016-09-01 12:01:22 -0700
committerJosh Rosen <joshrosen@databricks.com>2016-09-01 12:01:22 -0700
commit2be5f8d7e0819de03971d0af6fa310793d2d0e65 (patch)
tree602c660b7215536f901af5cc729664a5f0b4cf49 /sql/catalyst/src/main/scala/org/apache
parenta0aac4b775bc8c275f96ad0fbf85c9d8a3690588 (diff)
downloadspark-2be5f8d7e0819de03971d0af6fa310793d2d0e65.tar.gz
spark-2be5f8d7e0819de03971d0af6fa310793d2d0e65.tar.bz2
spark-2be5f8d7e0819de03971d0af6fa310793d2d0e65.zip
[SPARK-17263][SQL] Add hexadecimal literal parsing
## What changes were proposed in this pull request? This PR adds the ability to parse SQL (hexadecimal) binary literals (AKA bit strings). It follows the following syntax `X'[Hexadecimal Characters]+'`, for example: `X'01AB'` would create a binary the following binary array `0x01AB`. If an uneven number of hexadecimal characters is passed, then the upper 4 bits of the initial byte are kept empty, and the lower 4 bits are filled using the first character. For example `X'1C7'` would create the following binary array `0x01C7`. Binary data (Array[Byte]) does not have a proper `hashCode` and `equals` functions. This meant that comparing `Literal`s containing binary data was a pain. I have updated Literal.hashCode and Literal.equals to deal properly with binary data. ## How was this patch tested? Added tests to the `ExpressionParserSuite`, `SQLQueryTestSuite` and `ExpressionSQLBuilderSuite`. Author: Herman van Hovell <hvanhovell@databricks.com> Closes #14832 from hvanhovell/SPARK-17263.
Diffstat (limited to 'sql/catalyst/src/main/scala/org/apache')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala26
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala29
2 files changed, 41 insertions, 14 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 41e3952f0e..a597a17aad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.expressions
import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
+import java.util
import java.util.Objects
+import javax.xml.bind.DatatypeConverter
import org.json4s.JsonAST._
@@ -168,14 +170,29 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression with
override def foldable: Boolean = true
override def nullable: Boolean = value == null
- override def toString: String = if (value != null) value.toString else "null"
+ override def toString: String = value match {
+ case null => "null"
+ case binary: Array[Byte] => s"0x" + DatatypeConverter.printHexBinary(binary)
+ case other => other.toString
+ }
- override def hashCode(): Int = 31 * (31 * Objects.hashCode(dataType)) + Objects.hashCode(value)
+ override def hashCode(): Int = {
+ val valueHashCode = value match {
+ case null => 0
+ case binary: Array[Byte] => util.Arrays.hashCode(binary)
+ case other => other.hashCode()
+ }
+ 31 * Objects.hashCode(dataType) + valueHashCode
+ }
override def equals(other: Any): Boolean = other match {
+ case o: Literal if !dataType.equals(o.dataType) => false
case o: Literal =>
- dataType.equals(o.dataType) &&
- (value == null && null == o.value || value != null && value.equals(o.value))
+ (value, o.value) match {
+ case (null, null) => true
+ case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b)
+ case (a, b) => a != null && a.equals(b)
+ }
case _ => false
}
@@ -269,6 +286,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression with
case (v: Decimal, t: DecimalType) => v + "BD"
case (v: Int, DateType) => s"DATE '${DateTimeUtils.toJavaDate(v)}'"
case (v: Long, TimestampType) => s"TIMESTAMP('${DateTimeUtils.toJavaTimestamp(v)}')"
+ case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'"
case _ => value.toString
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 893db93368..42fbc16d03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.catalyst.parser
import java.sql.{Date, Timestamp}
+import javax.xml.bind.DatatypeConverter
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
@@ -1215,19 +1216,27 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* {{{
* [TYPE] '[VALUE]'
* }}}
- * Currently Date and Timestamp typed literals are supported.
- *
- * TODO what the added value of this over casting?
+ * Currently Date, Timestamp and Binary typed literals are supported.
*/
override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) {
val value = string(ctx.STRING)
- ctx.identifier.getText.toUpperCase match {
- case "DATE" =>
- Literal(Date.valueOf(value))
- case "TIMESTAMP" =>
- Literal(Timestamp.valueOf(value))
- case other =>
- throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
+ val valueType = ctx.identifier.getText.toUpperCase
+ try {
+ valueType match {
+ case "DATE" =>
+ Literal(Date.valueOf(value))
+ case "TIMESTAMP" =>
+ Literal(Timestamp.valueOf(value))
+ case "X" =>
+ val padding = if (value.length % 2 == 1) "0" else ""
+ Literal(DatatypeConverter.parseHexBinary(padding + value))
+ case other =>
+ throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
+ }
+ } catch {
+ case e: IllegalArgumentException =>
+ val message = Option(e.getMessage).getOrElse(s"Exception parsing $valueType")
+ throw new ParseException(message, ctx)
}
}