aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/test/resources/sql-tests
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@databricks.com>2016-09-15 20:24:15 +0200
committerHerman van Hovell <hvanhovell@databricks.com>2016-09-15 20:24:15 +0200
commitd403562eb4b5b1d804909861d3e8b75d8f6323b9 (patch)
treed83e7ca7f9f3508a44c8fa0f613e9a33d156c5d0 /sql/core/src/test/resources/sql-tests
parent5b8f7377d54f83b93ef2bfc2a01ca65fae6d3032 (diff)
downloadspark-d403562eb4b5b1d804909861d3e8b75d8f6323b9.tar.gz
spark-d403562eb4b5b1d804909861d3e8b75d8f6323b9.tar.bz2
spark-d403562eb4b5b1d804909861d3e8b75d8f6323b9.zip
[SPARK-17114][SQL] Fix aggregates grouped by literals with empty input
## What changes were proposed in this pull request? This PR fixes an issue with aggregates that have an empty input, and use a literals as their grouping keys. These aggregates are currently interpreted as aggregates **without** grouping keys, this triggers the ungrouped code path (which aways returns a single row). This PR fixes the `RemoveLiteralFromGroupExpressions` optimizer rule, which changes the semantics of the Aggregate by eliminating all literal grouping keys. ## How was this patch tested? Added tests to `SQLQueryTestSuite`. Author: Herman van Hovell <hvanhovell@databricks.com> Closes #15101 from hvanhovell/SPARK-17114-3.
Diffstat (limited to 'sql/core/src/test/resources/sql-tests')
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/group-by.sql17
-rw-r--r--sql/core/src/test/resources/sql-tests/results/group-by.sql.out51
2 files changed, 68 insertions, 0 deletions
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
new file mode 100644
index 0000000000..6741703d9d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -0,0 +1,17 @@
+-- Temporary data.
+create temporary view myview as values 128, 256 as v(int_col);
+
+-- group by should produce all input rows,
+select int_col, count(*) from myview group by int_col;
+
+-- group by should produce a single row.
+select 'foo', count(*) from myview group by 1;
+
+-- group-by should not produce any rows (whole stage code generation).
+select 'foo' from myview where int_col == 0 group by 1;
+
+-- group-by should not produce any rows (hash aggregate).
+select 'foo', approx_count_distinct(int_col) from myview where int_col == 0 group by 1;
+
+-- group-by should not produce any rows (sort aggregate).
+select 'foo', max(struct(int_col)) from myview where int_col == 0 group by 1;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
new file mode 100644
index 0000000000..9127bd4dd4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -0,0 +1,51 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 6
+
+
+-- !query 0
+create temporary view myview as values 128, 256 as v(int_col)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+select int_col, count(*) from myview group by int_col
+-- !query 1 schema
+struct<int_col:int,count(1):bigint>
+-- !query 1 output
+128 1
+256 1
+
+
+-- !query 2
+select 'foo', count(*) from myview group by 1
+-- !query 2 schema
+struct<foo:string,count(1):bigint>
+-- !query 2 output
+foo 2
+
+
+-- !query 3
+select 'foo' from myview where int_col == 0 group by 1
+-- !query 3 schema
+struct<foo:string>
+-- !query 3 output
+
+
+
+-- !query 4
+select 'foo', approx_count_distinct(int_col) from myview where int_col == 0 group by 1
+-- !query 4 schema
+struct<foo:string,approx_count_distinct(int_col):bigint>
+-- !query 4 output
+
+
+
+-- !query 5
+select 'foo', max(struct(int_col)) from myview where int_col == 0 group by 1
+-- !query 5 schema
+struct<foo:string,max(struct(int_col)):struct<int_col:int>>
+-- !query 5 output
+