From 2be5f8d7e0819de03971d0af6fa310793d2d0e65 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 1 Sep 2016 12:01:22 -0700 Subject: [SPARK-17263][SQL] Add hexadecimal literal parsing ## What changes were proposed in this pull request? This PR adds the ability to parse SQL (hexadecimal) binary literals (AKA bit strings). It follows the following syntax `X'[Hexadecimal Characters]+'`, for example: `X'01AB'` would create a binary the following binary array `0x01AB`. If an uneven number of hexadecimal characters is passed, then the upper 4 bits of the initial byte are kept empty, and the lower 4 bits are filled using the first character. For example `X'1C7'` would create the following binary array `0x01C7`. Binary data (Array[Byte]) does not have a proper `hashCode` and `equals` functions. This meant that comparing `Literal`s containing binary data was a pain. I have updated Literal.hashCode and Literal.equals to deal properly with binary data. ## How was this patch tested? Added tests to the `ExpressionParserSuite`, `SQLQueryTestSuite` and `ExpressionSQLBuilderSuite`. Author: Herman van Hovell Closes #14832 from hvanhovell/SPARK-17263. --- .../test/resources/sql-tests/inputs/literals.sql | 7 ++++ .../resources/sql-tests/results/literals.sql.out | 42 +++++++++++++++++++--- 2 files changed, 44 insertions(+), 5 deletions(-) (limited to 'sql/core') diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql index a532a598c6..40dceb19cf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -96,3 +96,10 @@ select 90912830918230182310293801923652346786BD, 123.0E-28BD, 123.08BD; -- out of range big decimal select 1.20E-38BD; + +-- hexadecimal binary literal +select x'2379ACFe'; + +-- invalid hexadecimal binary literal +select X'XuZ'; + diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index 85629f7ba8..e2d8daef98 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 40 +-- Number of queries: 42 -- !query 0 @@ -289,8 +289,13 @@ select date 'mar 11 2016' -- !query 31 schema struct<> -- !query 31 output -java.lang.IllegalArgumentException -null +org.apache.spark.sql.catalyst.parser.ParseException + +Exception parsing DATE(line 1, pos 7) + +== SQL == +select date 'mar 11 2016' +-------^^^ -- !query 32 @@ -306,8 +311,13 @@ select timestamp '2016-33-11 20:54:00.000' -- !query 33 schema struct<> -- !query 33 output -java.lang.IllegalArgumentException -Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff] +org.apache.spark.sql.catalyst.parser.ParseException + +Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff](line 1, pos 7) + +== SQL == +select timestamp '2016-33-11 20:54:00.000' +-------^^^ -- !query 34 @@ -376,3 +386,25 @@ DecimalType can only support precision up to 38(line 1, pos 7) == SQL == select 1.20E-38BD -------^^^ + + +-- !query 40 +select x'2379ACFe' +-- !query 40 schema +struct +-- !query 40 output +#y�� + + +-- !query 41 +select X'XuZ' +-- !query 41 schema +struct<> +-- !query 41 output +org.apache.spark.sql.catalyst.parser.ParseException + +contains illegal character for hexBinary: 0XuZ(line 1, pos 7) + +== SQL == +select X'XuZ' +-------^^^ -- cgit v1.2.3