diff options
author | William Benton <willb@redhat.com> | 2014-07-23 16:25:32 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-07-23 16:25:32 -0700 |
commit | e060d3ee2d910a5a802bb29630dca6f66cc0525d (patch) | |
tree | 766d3ece33ff5955d8b0ffafb824beee5b7f459e /sql/catalyst | |
parent | 91903e0a50b0efb7217610021a628b3043004d82 (diff) | |
download | spark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.tar.gz spark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.tar.bz2 spark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.zip |
SPARK-2226: [SQL] transform HAVING clauses with aggregate expressions that aren't in the aggregation list
This change adds an analyzer rule to
1. find expressions in `HAVING` clause filters that depend on unresolved attributes,
2. push these expressions down to the underlying aggregates, and then
3. project them away above the filter.
It also enables the `HAVING` queries in the Hive compatibility suite.
Author: William Benton <willb@redhat.com>
Closes #1497 from willb/spark-2226 and squashes the following commits:
92c9a93 [William Benton] Removed unnecessary import
f1d4f34 [William Benton] Cleanups missed in prior commit
0e1624f [William Benton] Incorporated suggestions from @marmbrus; thanks!
541d4ee [William Benton] Cleanups from review
5a12647 [William Benton] Explanatory comments and stylistic cleanups.
c7f2b2c [William Benton] Whitelist HAVING queries.
29a26e3 [William Benton] Added rule to handle unresolved attributes in HAVING clauses (SPARK-2226)
Diffstat (limited to 'sql/catalyst')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 27 |
1 files changed, 26 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c7188469bf..02bdb64f30 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ - /** * A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing * when all relations are already filled in and the analyser needs only to resolve attribute @@ -54,6 +53,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool StarExpansion :: ResolveFunctions :: GlobalAggregates :: + UnresolvedHavingClauseAttributes :: typeCoercionRules :_*), Batch("Check Analysis", Once, CheckResolution), @@ -152,6 +152,31 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool } /** + * This rule finds expressions in HAVING clause filters that depend on + * unresolved attributes. It pushes these expressions down to the underlying + * aggregates and then projects them away above the filter. + */ + object UnresolvedHavingClauseAttributes extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + case filter @ Filter(havingCondition, aggregate @ Aggregate(_, originalAggExprs, _)) + if !filter.resolved && aggregate.resolved && containsAggregate(havingCondition) => { + val evaluatedCondition = Alias(havingCondition, "havingCondition")() + val aggExprsWithHaving = evaluatedCondition +: originalAggExprs + + Project(aggregate.output, + Filter(evaluatedCondition.toAttribute, + aggregate.copy(aggregateExpressions = aggExprsWithHaving))) + } + + } + + protected def containsAggregate(condition: Expression): Boolean = + condition + .collect { case ae: AggregateExpression => ae } + .nonEmpty + } + + /** * When a SELECT clause has only a single expression and that expression is a * [[catalyst.expressions.Generator Generator]] we convert the * [[catalyst.plans.logical.Project Project]] to a [[catalyst.plans.logical.Generate Generate]]. |