diff options
6 files changed, 93 insertions, 25 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 41e3952f0e..a597a17aad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.expressions import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} +import java.util import java.util.Objects +import javax.xml.bind.DatatypeConverter import org.json4s.JsonAST._ @@ -168,14 +170,29 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression with override def foldable: Boolean = true override def nullable: Boolean = value == null - override def toString: String = if (value != null) value.toString else "null" + override def toString: String = value match { + case null => "null" + case binary: Array[Byte] => s"0x" + DatatypeConverter.printHexBinary(binary) + case other => other.toString + } - override def hashCode(): Int = 31 * (31 * Objects.hashCode(dataType)) + Objects.hashCode(value) + override def hashCode(): Int = { + val valueHashCode = value match { + case null => 0 + case binary: Array[Byte] => util.Arrays.hashCode(binary) + case other => other.hashCode() + } + 31 * Objects.hashCode(dataType) + valueHashCode + } override def equals(other: Any): Boolean = other match { + case o: Literal if !dataType.equals(o.dataType) => false case o: Literal => - dataType.equals(o.dataType) && - (value == null && null == o.value || value != null && value.equals(o.value)) + (value, o.value) match { + case (null, null) => true + case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b) + case (a, b) => a != null && a.equals(b) + } case _ => false } @@ -269,6 +286,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression with case (v: Decimal, t: DecimalType) => v + "BD" case (v: Int, DateType) => s"DATE '${DateTimeUtils.toJavaDate(v)}'" case (v: Long, TimestampType) => s"TIMESTAMP('${DateTimeUtils.toJavaTimestamp(v)}')" + case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'" case _ => value.toString } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 893db93368..42fbc16d03 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.parser import java.sql.{Date, Timestamp} +import javax.xml.bind.DatatypeConverter import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer @@ -1215,19 +1216,27 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * {{{ * [TYPE] '[VALUE]' * }}} - * Currently Date and Timestamp typed literals are supported. - * - * TODO what the added value of this over casting? + * Currently Date, Timestamp and Binary typed literals are supported. */ override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) { val value = string(ctx.STRING) - ctx.identifier.getText.toUpperCase match { - case "DATE" => - Literal(Date.valueOf(value)) - case "TIMESTAMP" => - Literal(Timestamp.valueOf(value)) - case other => - throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) + val valueType = ctx.identifier.getText.toUpperCase + try { + valueType match { + case "DATE" => + Literal(Date.valueOf(value)) + case "TIMESTAMP" => + Literal(Timestamp.valueOf(value)) + case "X" => + val padding = if (value.length % 2 == 1) "0" else "" + Literal(DatatypeConverter.parseHexBinary(padding + value)) + case other => + throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) + } + } catch { + case e: IllegalArgumentException => + val message = Option(e.getMessage).getOrElse(s"Exception parsing $valueType") + throw new ParseException(message, ctx) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index dbc5db39ae..4e399eef1f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -331,16 +331,17 @@ class ExpressionParserSuite extends PlanTest { test("type constructors") { // Dates. assertEqual("dAte '2016-03-11'", Literal(Date.valueOf("2016-03-11"))) - intercept[IllegalArgumentException] { - parseExpression("DAtE 'mar 11 2016'") - } + intercept("DAtE 'mar 11 2016'") // Timestamps. assertEqual("tImEstAmp '2016-03-11 20:54:00.000'", Literal(Timestamp.valueOf("2016-03-11 20:54:00.000"))) - intercept[IllegalArgumentException] { - parseExpression("timestamP '2016-33-11 20:54:00.000'") - } + intercept("timestamP '2016-33-11 20:54:00.000'") + + // Binary. + assertEqual("X'A'", Literal(Array(0x0a).map(_.toByte))) + assertEqual("x'A10C'", Literal(Array(0xa1, 0x0c).map(_.toByte))) + intercept("x'A1OC'") // Unsupported datatype. intercept("GEO '(10,-6)'", "Literals of type 'GEO' are currently not supported.") diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql index a532a598c6..40dceb19cf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -96,3 +96,10 @@ select 90912830918230182310293801923652346786BD, 123.0E-28BD, 123.08BD; -- out of range big decimal select 1.20E-38BD; + +-- hexadecimal binary literal +select x'2379ACFe'; + +-- invalid hexadecimal binary literal +select X'XuZ'; + diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index 85629f7ba8..e2d8daef98 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 40 +-- Number of queries: 42 -- !query 0 @@ -289,8 +289,13 @@ select date 'mar 11 2016' -- !query 31 schema struct<> -- !query 31 output -java.lang.IllegalArgumentException -null +org.apache.spark.sql.catalyst.parser.ParseException + +Exception parsing DATE(line 1, pos 7) + +== SQL == +select date 'mar 11 2016' +-------^^^ -- !query 32 @@ -306,8 +311,13 @@ select timestamp '2016-33-11 20:54:00.000' -- !query 33 schema struct<> -- !query 33 output -java.lang.IllegalArgumentException -Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff] +org.apache.spark.sql.catalyst.parser.ParseException + +Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff](line 1, pos 7) + +== SQL == +select timestamp '2016-33-11 20:54:00.000' +-------^^^ -- !query 34 @@ -376,3 +386,25 @@ DecimalType can only support precision up to 38(line 1, pos 7) == SQL == select 1.20E-38BD -------^^^ + + +-- !query 40 +select x'2379ACFe' +-- !query 40 schema +struct<X'2379ACFE':binary> +-- !query 40 output +#y�� + + +-- !query 41 +select X'XuZ' +-- !query 41 schema +struct<> +-- !query 41 output +org.apache.spark.sql.catalyst.parser.ParseException + +contains illegal character for hexBinary: 0XuZ(line 1, pos 7) + +== SQL == +select X'XuZ' +-------^^^ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala index 43a218b4d1..d2b2f38fa1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala @@ -40,6 +40,7 @@ class ExpressionSQLBuilderSuite extends SQLBuilderTest { checkSQL(Literal(Double.NegativeInfinity), "CAST('-Infinity' AS DOUBLE)") checkSQL(Literal(Double.NaN), "CAST('NaN' AS DOUBLE)") checkSQL(Literal(BigDecimal("10.0000000").underlying), "10.0000000BD") + checkSQL(Literal(Array(0x01, 0xA3).map(_.toByte)), "X'01A3'") checkSQL( Literal(Timestamp.valueOf("2016-01-01 00:00:00")), "TIMESTAMP('2016-01-01 00:00:00.0')") // TODO tests for decimals |