aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <simonh@tw.ibm.com>2016-07-28 22:33:33 +0800
committerWenchen Fan <wenchen@databricks.com>2016-07-28 22:33:33 +0800
commit9ade77c3fa2e1bf436b79368a97d5980c12fe215 (patch)
tree88b7f88200277e6c42272df3b22c13474e0a334e /sql
parent762366fd8722f2b3fa98b8da9338b757a1821708 (diff)
downloadspark-9ade77c3fa2e1bf436b79368a97d5980c12fe215.tar.gz
spark-9ade77c3fa2e1bf436b79368a97d5980c12fe215.tar.bz2
spark-9ade77c3fa2e1bf436b79368a97d5980c12fe215.zip
[SPARK-16639][SQL] The query with having condition that contains grouping by column should work
## What changes were proposed in this pull request? The query with having condition that contains grouping by column will be failed during analysis. E.g., create table tbl(a int, b string); select count(b) from tbl group by a + 1 having a + 1 = 2; Having condition should be able to use grouping by column. ## How was this patch tested? Jenkins tests. Author: Liang-Chi Hsieh <simonh@tw.ibm.com> Closes #14296 from viirya/having-contains-grouping-column.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala15
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala22
2 files changed, 31 insertions, 6 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 61162ccdba..2efa997ff2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -246,7 +246,7 @@ class Analyzer(
}.isDefined
}
- private def hasGroupingFunction(e: Expression): Boolean = {
+ private[sql] def hasGroupingFunction(e: Expression): Boolean = {
e.collectFirst {
case g: Grouping => g
case g: GroupingID => g
@@ -1207,6 +1207,19 @@ class Analyzer(
val alias = Alias(ae, ae.toString)()
aggregateExpressions += alias
alias.toAttribute
+ // Grouping functions are handled in the rule [[ResolveGroupingAnalytics]].
+ case e: Expression if grouping.exists(_.semanticEquals(e)) &&
+ !ResolveGroupingAnalytics.hasGroupingFunction(e) &&
+ !aggregate.output.exists(_.semanticEquals(e)) =>
+ e match {
+ case ne: NamedExpression =>
+ aggregateExpressions += ne
+ ne.toAttribute
+ case _ =>
+ val alias = Alias(e, e.toString)()
+ aggregateExpressions += alias
+ alias.toAttribute
+ }
}
// Push the aggregate expressions into the aggregate (if any).
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 06cc2a5057..d89bda1e48 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -39,11 +39,23 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
setupTestData()
test("having clause") {
- Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
- .createOrReplaceTempView("hav")
- checkAnswer(
- sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"),
- Row("one", 6) :: Row("three", 3) :: Nil)
+ withTempView("hav") {
+ Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
+ .createOrReplaceTempView("hav")
+ checkAnswer(
+ sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"),
+ Row("one", 6) :: Row("three", 3) :: Nil)
+ }
+ }
+
+ test("having condition contains grouping column") {
+ withTempView("hav") {
+ Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
+ .createOrReplaceTempView("hav")
+ checkAnswer(
+ sql("SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2"),
+ Row(1) :: Nil)
+ }
}
test("SPARK-8010: promote numeric to string") {