aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorpetermaxlee <petermaxlee@gmail.com>2016-08-19 18:14:45 -0700
committerReynold Xin <rxin@databricks.com>2016-08-19 18:14:45 -0700
commita117afa7c2d94f943106542ec53d74ba2b5f1058 (patch)
tree8c7ba5e5e3a47c84af64d71c20c29b3cf5a107e1 /sql/core
parentacac7a508a29d0f75d86ee2e4ca83ebf01a36cf8 (diff)
downloadspark-a117afa7c2d94f943106542ec53d74ba2b5f1058.tar.gz
spark-a117afa7c2d94f943106542ec53d74ba2b5f1058.tar.bz2
spark-a117afa7c2d94f943106542ec53d74ba2b5f1058.zip
[SPARK-17149][SQL] array.sql for testing array related functions
## What changes were proposed in this pull request? This patch creates array.sql in SQLQueryTestSuite for testing array related functions, including: - indexing - array creation - size - array_contains - sort_array ## How was this patch tested? The patch itself is about adding tests. Author: petermaxlee <petermaxlee@gmail.com> Closes #14708 from petermaxlee/SPARK-17149.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/array.sql86
-rw-r--r--sql/core/src/test/resources/sql-tests/results/array.sql.out144
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala16
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala10
4 files changed, 240 insertions, 16 deletions
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
new file mode 100644
index 0000000000..4038a0da41
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -0,0 +1,86 @@
+-- test cases for array functions
+
+create temporary view data as select * from values
+ ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
+ ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
+ as data(a, b, c);
+
+select * from data;
+
+-- index into array
+select a, b[0], b[0] + b[1] from data;
+
+-- index into array of arrays
+select a, c[0][0] + c[0][0 + 1] from data;
+
+
+create temporary view primitive_arrays as select * from values (
+ array(true),
+ array(2Y, 1Y),
+ array(2S, 1S),
+ array(2, 1),
+ array(2L, 1L),
+ array(9223372036854775809, 9223372036854775808),
+ array(2.0D, 1.0D),
+ array(float(2.0), float(1.0)),
+ array(date '2016-03-14', date '2016-03-13'),
+ array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000')
+) as primitive_arrays(
+ boolean_array,
+ tinyint_array,
+ smallint_array,
+ int_array,
+ bigint_array,
+ decimal_array,
+ double_array,
+ float_array,
+ date_array,
+ timestamp_array
+);
+
+select * from primitive_arrays;
+
+-- array_contains on all primitive types: result should alternate between true and false
+select
+ array_contains(boolean_array, true), array_contains(boolean_array, false),
+ array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y),
+ array_contains(smallint_array, 2S), array_contains(smallint_array, 0S),
+ array_contains(int_array, 2), array_contains(int_array, 0),
+ array_contains(bigint_array, 2L), array_contains(bigint_array, 0L),
+ array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1),
+ array_contains(double_array, 2.0D), array_contains(double_array, 0.0D),
+ array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)),
+ array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'),
+ array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
+from primitive_arrays;
+
+-- array_contains on nested arrays
+select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data;
+
+-- sort_array
+select
+ sort_array(boolean_array),
+ sort_array(tinyint_array),
+ sort_array(smallint_array),
+ sort_array(int_array),
+ sort_array(bigint_array),
+ sort_array(decimal_array),
+ sort_array(double_array),
+ sort_array(float_array),
+ sort_array(date_array),
+ sort_array(timestamp_array)
+from primitive_arrays;
+
+-- size
+select
+ size(boolean_array),
+ size(tinyint_array),
+ size(smallint_array),
+ size(int_array),
+ size(bigint_array),
+ size(decimal_array),
+ size(double_array),
+ size(float_array),
+ size(date_array),
+ size(timestamp_array)
+from primitive_arrays;
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
new file mode 100644
index 0000000000..4a1d149c1f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -0,0 +1,144 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 10
+
+
+-- !query 0
+create temporary view data as select * from values
+ ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
+ ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
+ as data(a, b, c)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+select * from data
+-- !query 1 schema
+struct<a:string,b:array<int>,c:array<array<int>>>
+-- !query 1 output
+one [11,12,13] [[111,112,113],[121,122,123]]
+two [21,22,23] [[211,212,213],[221,222,223]]
+
+
+-- !query 2
+select a, b[0], b[0] + b[1] from data
+-- !query 2 schema
+struct<a:string,b[0]:int,(b[0] + b[1]):int>
+-- !query 2 output
+one 11 23
+two 21 43
+
+
+-- !query 3
+select a, c[0][0] + c[0][0 + 1] from data
+-- !query 3 schema
+struct<a:string,(c[0][0] + c[0][(0 + 1)]):int>
+-- !query 3 output
+one 223
+two 423
+
+
+-- !query 4
+create temporary view primitive_arrays as select * from values (
+ array(true),
+ array(2Y, 1Y),
+ array(2S, 1S),
+ array(2, 1),
+ array(2L, 1L),
+ array(9223372036854775809, 9223372036854775808),
+ array(2.0D, 1.0D),
+ array(float(2.0), float(1.0)),
+ array(date '2016-03-14', date '2016-03-13'),
+ array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000')
+) as primitive_arrays(
+ boolean_array,
+ tinyint_array,
+ smallint_array,
+ int_array,
+ bigint_array,
+ decimal_array,
+ double_array,
+ float_array,
+ date_array,
+ timestamp_array
+)
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+select * from primitive_arrays
+-- !query 5 schema
+struct<boolean_array:array<boolean>,tinyint_array:array<tinyint>,smallint_array:array<smallint>,int_array:array<int>,bigint_array:array<bigint>,decimal_array:array<decimal(19,0)>,double_array:array<double>,float_array:array<float>,date_array:array<date>,timestamp_array:array<timestamp>>
+-- !query 5 output
+[true] [2,1] [2,1] [2,1] [2,1] [9223372036854775809,9223372036854775808] [2.0,1.0] [2.0,1.0] [2016-03-14,2016-03-13] [2016-11-15 20:54:00.0,2016-11-12 20:54:00.0]
+
+
+-- !query 6
+select
+ array_contains(boolean_array, true), array_contains(boolean_array, false),
+ array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y),
+ array_contains(smallint_array, 2S), array_contains(smallint_array, 0S),
+ array_contains(int_array, 2), array_contains(int_array, 0),
+ array_contains(bigint_array, 2L), array_contains(bigint_array, 0L),
+ array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1),
+ array_contains(double_array, 2.0D), array_contains(double_array, 0.0D),
+ array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)),
+ array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'),
+ array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
+from primitive_arrays
+-- !query 6 schema
+struct<array_contains(boolean_array, true):boolean,array_contains(boolean_array, false):boolean,array_contains(tinyint_array, 2):boolean,array_contains(tinyint_array, 0):boolean,array_contains(smallint_array, 2):boolean,array_contains(smallint_array, 0):boolean,array_contains(int_array, 2):boolean,array_contains(int_array, 0):boolean,array_contains(bigint_array, 2):boolean,array_contains(bigint_array, 0):boolean,array_contains(decimal_array, 9223372036854775809):boolean,array_contains(decimal_array, CAST(1 AS DECIMAL(19,0))):boolean,array_contains(double_array, 2.0):boolean,array_contains(double_array, 0.0):boolean,array_contains(float_array, CAST(2.0 AS FLOAT)):boolean,array_contains(float_array, CAST(0.0 AS FLOAT)):boolean,array_contains(date_array, DATE '2016-03-14'):boolean,array_contains(date_array, DATE '2016-01-01'):boolean,array_contains(timestamp_array, TIMESTAMP('2016-11-15 20:54:00.0')):boolean,array_contains(timestamp_array, TIMESTAMP('2016-01-01 20:54:00.0')):boolean>
+-- !query 6 output
+true false true false true false true false true false true false true false true false true false true false
+
+
+-- !query 7
+select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data
+-- !query 7 schema
+struct<array_contains(b, 11):boolean,array_contains(c, array(111, 112, 113)):boolean>
+-- !query 7 output
+false false
+true true
+
+
+-- !query 8
+select
+ sort_array(boolean_array),
+ sort_array(tinyint_array),
+ sort_array(smallint_array),
+ sort_array(int_array),
+ sort_array(bigint_array),
+ sort_array(decimal_array),
+ sort_array(double_array),
+ sort_array(float_array),
+ sort_array(date_array),
+ sort_array(timestamp_array)
+from primitive_arrays
+-- !query 8 schema
+struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array, true):array<tinyint>,sort_array(smallint_array, true):array<smallint>,sort_array(int_array, true):array<int>,sort_array(bigint_array, true):array<bigint>,sort_array(decimal_array, true):array<decimal(19,0)>,sort_array(double_array, true):array<double>,sort_array(float_array, true):array<float>,sort_array(date_array, true):array<date>,sort_array(timestamp_array, true):array<timestamp>>
+-- !query 8 output
+[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00.0,2016-11-15 20:54:00.0]
+
+
+-- !query 9
+select
+ size(boolean_array),
+ size(tinyint_array),
+ size(smallint_array),
+ size(int_array),
+ size(bigint_array),
+ size(decimal_array),
+ size(double_array),
+ size(float_array),
+ size(date_array),
+ size(timestamp_array)
+from primitive_arrays
+-- !query 9 schema
+struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
+-- !query 9 output
+1 2 2 2 2 2 2 2 2 2
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 4fcde58833..eac266cba5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -445,12 +445,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
Nil)
}
- test("index into array") {
- checkAnswer(
- sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"),
- arrayData.map(d => Row(d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect())
- }
-
test("left semi greater than predicate") {
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
checkAnswer(
@@ -472,16 +466,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)
}
- test("index into array of arrays") {
- checkAnswer(
- sql(
- "SELECT nestedData, nestedData[0][0], nestedData[0][0] + nestedData[0][1] FROM arrayData"),
- arrayData.map(d =>
- Row(d.nestedData,
- d.nestedData(0)(0),
- d.nestedData(0)(0) + d.nestedData(0)(1))).collect().toSeq)
- }
-
test("agg") {
checkAnswer(
sql("SELECT a, SUM(b) FROM testData2 GROUP BY a"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 069a9b665e..55d5a56f10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -35,6 +35,16 @@ import org.apache.spark.sql.types.StructType
* Each case is loaded from a file in "spark/sql/core/src/test/resources/sql-tests/inputs".
* Each case has a golden result file in "spark/sql/core/src/test/resources/sql-tests/results".
*
+ * To run the entire test suite:
+ * {{{
+ * build/sbt "sql/test-only *SQLQueryTestSuite"
+ * }}}
+ *
+ * To run a single test file upon change:
+ * {{{
+ * build/sbt "~sql/test-only *SQLQueryTestSuite -- -z inline-table.sql"
+ * }}}
+ *
* To re-generate golden files, run:
* {{{
* SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"