diff options
author | Liang-Chi Hsieh <simonh@tw.ibm.com> | 2016-07-28 22:33:33 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-07-28 22:33:33 +0800 |
commit | 9ade77c3fa2e1bf436b79368a97d5980c12fe215 (patch) | |
tree | 88b7f88200277e6c42272df3b22c13474e0a334e /sql | |
parent | 762366fd8722f2b3fa98b8da9338b757a1821708 (diff) | |
download | spark-9ade77c3fa2e1bf436b79368a97d5980c12fe215.tar.gz spark-9ade77c3fa2e1bf436b79368a97d5980c12fe215.tar.bz2 spark-9ade77c3fa2e1bf436b79368a97d5980c12fe215.zip |
[SPARK-16639][SQL] The query with having condition that contains grouping by column should work
## What changes were proposed in this pull request?
The query with having condition that contains grouping by column will be failed during analysis. E.g.,
create table tbl(a int, b string);
select count(b) from tbl group by a + 1 having a + 1 = 2;
Having condition should be able to use grouping by column.
## How was this patch tested?
Jenkins tests.
Author: Liang-Chi Hsieh <simonh@tw.ibm.com>
Closes #14296 from viirya/having-contains-grouping-column.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 15 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 22 |
2 files changed, 31 insertions, 6 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 61162ccdba..2efa997ff2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -246,7 +246,7 @@ class Analyzer( }.isDefined } - private def hasGroupingFunction(e: Expression): Boolean = { + private[sql] def hasGroupingFunction(e: Expression): Boolean = { e.collectFirst { case g: Grouping => g case g: GroupingID => g @@ -1207,6 +1207,19 @@ class Analyzer( val alias = Alias(ae, ae.toString)() aggregateExpressions += alias alias.toAttribute + // Grouping functions are handled in the rule [[ResolveGroupingAnalytics]]. + case e: Expression if grouping.exists(_.semanticEquals(e)) && + !ResolveGroupingAnalytics.hasGroupingFunction(e) && + !aggregate.output.exists(_.semanticEquals(e)) => + e match { + case ne: NamedExpression => + aggregateExpressions += ne + ne.toAttribute + case _ => + val alias = Alias(e, e.toString)() + aggregateExpressions += alias + alias.toAttribute + } } // Push the aggregate expressions into the aggregate (if any). diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 06cc2a5057..d89bda1e48 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -39,11 +39,23 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { setupTestData() test("having clause") { - Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") - .createOrReplaceTempView("hav") - checkAnswer( - sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"), - Row("one", 6) :: Row("three", 3) :: Nil) + withTempView("hav") { + Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") + .createOrReplaceTempView("hav") + checkAnswer( + sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"), + Row("one", 6) :: Row("three", 3) :: Nil) + } + } + + test("having condition contains grouping column") { + withTempView("hav") { + Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") + .createOrReplaceTempView("hav") + checkAnswer( + sql("SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2"), + Row(1) :: Nil) + } } test("SPARK-8010: promote numeric to string") { |