From a117afa7c2d94f943106542ec53d74ba2b5f1058 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Fri, 19 Aug 2016 18:14:45 -0700 Subject: [SPARK-17149][SQL] array.sql for testing array related functions ## What changes were proposed in this pull request? This patch creates array.sql in SQLQueryTestSuite for testing array related functions, including: - indexing - array creation - size - array_contains - sort_array ## How was this patch tested? The patch itself is about adding tests. Author: petermaxlee Closes #14708 from petermaxlee/SPARK-17149. --- .../sql/catalyst/analysis/FunctionRegistry.scala | 12 +- .../src/test/resources/sql-tests/inputs/array.sql | 86 ++++++++++++ .../test/resources/sql-tests/results/array.sql.out | 144 +++++++++++++++++++++ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 16 --- .../org/apache/spark/sql/SQLQueryTestSuite.scala | 10 ++ .../hive/execution/HiveCompatibilitySuite.scala | 4 +- .../spark/sql/hive/execution/HiveQuerySuite.scala | 9 -- 7 files changed, 248 insertions(+), 33 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/array.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/array.sql.out (limited to 'sql') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index c5f91c1590..35fd800df4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -161,7 +161,6 @@ object FunctionRegistry { val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map( // misc non-aggregate functions expression[Abs]("abs"), - expression[CreateArray]("array"), expression[Coalesce]("coalesce"), expression[Explode]("explode"), expression[Greatest]("greatest"), @@ -172,10 +171,6 @@ object FunctionRegistry { expression[IsNull]("isnull"), expression[IsNotNull]("isnotnull"), expression[Least]("least"), - expression[CreateMap]("map"), - expression[MapKeys]("map_keys"), - expression[MapValues]("map_values"), - expression[CreateNamedStruct]("named_struct"), expression[NaNvl]("nanvl"), expression[NullIf]("nullif"), expression[Nvl]("nvl"), @@ -184,7 +179,6 @@ object FunctionRegistry { expression[Rand]("rand"), expression[Randn]("randn"), expression[Stack]("stack"), - expression[CreateStruct]("struct"), expression[CaseWhen]("when"), // math functions @@ -354,9 +348,15 @@ object FunctionRegistry { expression[TimeWindow]("window"), // collection functions + expression[CreateArray]("array"), expression[ArrayContains]("array_contains"), + expression[CreateMap]("map"), + expression[CreateNamedStruct]("named_struct"), + expression[MapKeys]("map_keys"), + expression[MapValues]("map_values"), expression[Size]("size"), expression[SortArray]("sort_array"), + expression[CreateStruct]("struct"), // misc functions expression[AssertTrue]("assert_true"), diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql new file mode 100644 index 0000000000..4038a0da41 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql @@ -0,0 +1,86 @@ +-- test cases for array functions + +create temporary view data as select * from values + ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))), + ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223))) + as data(a, b, c); + +select * from data; + +-- index into array +select a, b[0], b[0] + b[1] from data; + +-- index into array of arrays +select a, c[0][0] + c[0][0 + 1] from data; + + +create temporary view primitive_arrays as select * from values ( + array(true), + array(2Y, 1Y), + array(2S, 1S), + array(2, 1), + array(2L, 1L), + array(9223372036854775809, 9223372036854775808), + array(2.0D, 1.0D), + array(float(2.0), float(1.0)), + array(date '2016-03-14', date '2016-03-13'), + array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000') +) as primitive_arrays( + boolean_array, + tinyint_array, + smallint_array, + int_array, + bigint_array, + decimal_array, + double_array, + float_array, + date_array, + timestamp_array +); + +select * from primitive_arrays; + +-- array_contains on all primitive types: result should alternate between true and false +select + array_contains(boolean_array, true), array_contains(boolean_array, false), + array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y), + array_contains(smallint_array, 2S), array_contains(smallint_array, 0S), + array_contains(int_array, 2), array_contains(int_array, 0), + array_contains(bigint_array, 2L), array_contains(bigint_array, 0L), + array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1), + array_contains(double_array, 2.0D), array_contains(double_array, 0.0D), + array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)), + array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'), + array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000') +from primitive_arrays; + +-- array_contains on nested arrays +select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data; + +-- sort_array +select + sort_array(boolean_array), + sort_array(tinyint_array), + sort_array(smallint_array), + sort_array(int_array), + sort_array(bigint_array), + sort_array(decimal_array), + sort_array(double_array), + sort_array(float_array), + sort_array(date_array), + sort_array(timestamp_array) +from primitive_arrays; + +-- size +select + size(boolean_array), + size(tinyint_array), + size(smallint_array), + size(int_array), + size(bigint_array), + size(decimal_array), + size(double_array), + size(float_array), + size(date_array), + size(timestamp_array) +from primitive_arrays; diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out new file mode 100644 index 0000000000..4a1d149c1f --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out @@ -0,0 +1,144 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 10 + + +-- !query 0 +create temporary view data as select * from values + ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))), + ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223))) + as data(a, b, c) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +select * from data +-- !query 1 schema +struct,c:array>> +-- !query 1 output +one [11,12,13] [[111,112,113],[121,122,123]] +two [21,22,23] [[211,212,213],[221,222,223]] + + +-- !query 2 +select a, b[0], b[0] + b[1] from data +-- !query 2 schema +struct +-- !query 2 output +one 11 23 +two 21 43 + + +-- !query 3 +select a, c[0][0] + c[0][0 + 1] from data +-- !query 3 schema +struct +-- !query 3 output +one 223 +two 423 + + +-- !query 4 +create temporary view primitive_arrays as select * from values ( + array(true), + array(2Y, 1Y), + array(2S, 1S), + array(2, 1), + array(2L, 1L), + array(9223372036854775809, 9223372036854775808), + array(2.0D, 1.0D), + array(float(2.0), float(1.0)), + array(date '2016-03-14', date '2016-03-13'), + array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000') +) as primitive_arrays( + boolean_array, + tinyint_array, + smallint_array, + int_array, + bigint_array, + decimal_array, + double_array, + float_array, + date_array, + timestamp_array +) +-- !query 4 schema +struct<> +-- !query 4 output + + + +-- !query 5 +select * from primitive_arrays +-- !query 5 schema +struct,tinyint_array:array,smallint_array:array,int_array:array,bigint_array:array,decimal_array:array,double_array:array,float_array:array,date_array:array,timestamp_array:array> +-- !query 5 output +[true] [2,1] [2,1] [2,1] [2,1] [9223372036854775809,9223372036854775808] [2.0,1.0] [2.0,1.0] [2016-03-14,2016-03-13] [2016-11-15 20:54:00.0,2016-11-12 20:54:00.0] + + +-- !query 6 +select + array_contains(boolean_array, true), array_contains(boolean_array, false), + array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y), + array_contains(smallint_array, 2S), array_contains(smallint_array, 0S), + array_contains(int_array, 2), array_contains(int_array, 0), + array_contains(bigint_array, 2L), array_contains(bigint_array, 0L), + array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1), + array_contains(double_array, 2.0D), array_contains(double_array, 0.0D), + array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)), + array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'), + array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000') +from primitive_arrays +-- !query 6 schema +struct +-- !query 6 output +true false true false true false true false true false true false true false true false true false true false + + +-- !query 7 +select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data +-- !query 7 schema +struct +-- !query 7 output +false false +true true + + +-- !query 8 +select + sort_array(boolean_array), + sort_array(tinyint_array), + sort_array(smallint_array), + sort_array(int_array), + sort_array(bigint_array), + sort_array(decimal_array), + sort_array(double_array), + sort_array(float_array), + sort_array(date_array), + sort_array(timestamp_array) +from primitive_arrays +-- !query 8 schema +struct,sort_array(tinyint_array, true):array,sort_array(smallint_array, true):array,sort_array(int_array, true):array,sort_array(bigint_array, true):array,sort_array(decimal_array, true):array,sort_array(double_array, true):array,sort_array(float_array, true):array,sort_array(date_array, true):array,sort_array(timestamp_array, true):array> +-- !query 8 output +[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00.0,2016-11-15 20:54:00.0] + + +-- !query 9 +select + size(boolean_array), + size(tinyint_array), + size(smallint_array), + size(int_array), + size(bigint_array), + size(decimal_array), + size(double_array), + size(float_array), + size(date_array), + size(timestamp_array) +from primitive_arrays +-- !query 9 schema +struct +-- !query 9 output +1 2 2 2 2 2 2 2 2 2 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 4fcde58833..eac266cba5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -445,12 +445,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Nil) } - test("index into array") { - checkAnswer( - sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"), - arrayData.map(d => Row(d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect()) - } - test("left semi greater than predicate") { withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { checkAnswer( @@ -472,16 +466,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } - test("index into array of arrays") { - checkAnswer( - sql( - "SELECT nestedData, nestedData[0][0], nestedData[0][0] + nestedData[0][1] FROM arrayData"), - arrayData.map(d => - Row(d.nestedData, - d.nestedData(0)(0), - d.nestedData(0)(0) + d.nestedData(0)(1))).collect().toSeq) - } - test("agg") { checkAnswer( sql("SELECT a, SUM(b) FROM testData2 GROUP BY a"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 069a9b665e..55d5a56f10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -35,6 +35,16 @@ import org.apache.spark.sql.types.StructType * Each case is loaded from a file in "spark/sql/core/src/test/resources/sql-tests/inputs". * Each case has a golden result file in "spark/sql/core/src/test/resources/sql-tests/results". * + * To run the entire test suite: + * {{{ + * build/sbt "sql/test-only *SQLQueryTestSuite" + * }}} + * + * To run a single test file upon change: + * {{{ + * build/sbt "~sql/test-only *SQLQueryTestSuite -- -z inline-table.sql" + * }}} + * * To re-generate golden files, run: * {{{ * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite" diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 13d18fdec0..a54d234876 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -979,8 +979,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_PI", "udf_acos", "udf_add", - "udf_array", - "udf_array_contains", + // "udf_array", -- done in array.sql + // "udf_array_contains", -- done in array.sql "udf_ascii", "udf_asin", "udf_atan", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 6785167d3d..3c7dbb449c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -216,15 +216,6 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { assert(new Timestamp(1000) == r1.getTimestamp(0)) } - createQueryTest("constant array", - """ - |SELECT sort_array( - | sort_array( - | array("hadoop distributed file system", - | "enterprise databases", "hadoop map-reduce"))) - |FROM src LIMIT 1; - """.stripMargin) - createQueryTest("null case", "SELECT case when(true) then 1 else null end FROM src LIMIT 1") -- cgit v1.2.3