diff options
author | Herman van Hovell <hvanhovell@databricks.com> | 2016-09-15 20:24:15 +0200 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2016-09-15 20:24:15 +0200 |
commit | d403562eb4b5b1d804909861d3e8b75d8f6323b9 (patch) | |
tree | d83e7ca7f9f3508a44c8fa0f613e9a33d156c5d0 /sql/core/src/test/resources/sql-tests | |
parent | 5b8f7377d54f83b93ef2bfc2a01ca65fae6d3032 (diff) | |
download | spark-d403562eb4b5b1d804909861d3e8b75d8f6323b9.tar.gz spark-d403562eb4b5b1d804909861d3e8b75d8f6323b9.tar.bz2 spark-d403562eb4b5b1d804909861d3e8b75d8f6323b9.zip |
[SPARK-17114][SQL] Fix aggregates grouped by literals with empty input
## What changes were proposed in this pull request?
This PR fixes an issue with aggregates that have an empty input, and use a literals as their grouping keys. These aggregates are currently interpreted as aggregates **without** grouping keys, this triggers the ungrouped code path (which aways returns a single row).
This PR fixes the `RemoveLiteralFromGroupExpressions` optimizer rule, which changes the semantics of the Aggregate by eliminating all literal grouping keys.
## How was this patch tested?
Added tests to `SQLQueryTestSuite`.
Author: Herman van Hovell <hvanhovell@databricks.com>
Closes #15101 from hvanhovell/SPARK-17114-3.
Diffstat (limited to 'sql/core/src/test/resources/sql-tests')
-rw-r--r-- | sql/core/src/test/resources/sql-tests/inputs/group-by.sql | 17 | ||||
-rw-r--r-- | sql/core/src/test/resources/sql-tests/results/group-by.sql.out | 51 |
2 files changed, 68 insertions, 0 deletions
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql new file mode 100644 index 0000000000..6741703d9d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -0,0 +1,17 @@ +-- Temporary data. +create temporary view myview as values 128, 256 as v(int_col); + +-- group by should produce all input rows, +select int_col, count(*) from myview group by int_col; + +-- group by should produce a single row. +select 'foo', count(*) from myview group by 1; + +-- group-by should not produce any rows (whole stage code generation). +select 'foo' from myview where int_col == 0 group by 1; + +-- group-by should not produce any rows (hash aggregate). +select 'foo', approx_count_distinct(int_col) from myview where int_col == 0 group by 1; + +-- group-by should not produce any rows (sort aggregate). +select 'foo', max(struct(int_col)) from myview where int_col == 0 group by 1; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out new file mode 100644 index 0000000000..9127bd4dd4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -0,0 +1,51 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 6 + + +-- !query 0 +create temporary view myview as values 128, 256 as v(int_col) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +select int_col, count(*) from myview group by int_col +-- !query 1 schema +struct<int_col:int,count(1):bigint> +-- !query 1 output +128 1 +256 1 + + +-- !query 2 +select 'foo', count(*) from myview group by 1 +-- !query 2 schema +struct<foo:string,count(1):bigint> +-- !query 2 output +foo 2 + + +-- !query 3 +select 'foo' from myview where int_col == 0 group by 1 +-- !query 3 schema +struct<foo:string> +-- !query 3 output + + + +-- !query 4 +select 'foo', approx_count_distinct(int_col) from myview where int_col == 0 group by 1 +-- !query 4 schema +struct<foo:string,approx_count_distinct(int_col):bigint> +-- !query 4 output + + + +-- !query 5 +select 'foo', max(struct(int_col)) from myview where int_col == 0 group by 1 +-- !query 5 schema +struct<foo:string,max(struct(int_col)):struct<int_col:int>> +-- !query 5 output + |