aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorWilliam Benton <willb@redhat.com>2014-07-23 16:25:32 -0700
committerMichael Armbrust <michael@databricks.com>2014-07-23 16:25:32 -0700
commite060d3ee2d910a5a802bb29630dca6f66cc0525d (patch)
tree766d3ece33ff5955d8b0ffafb824beee5b7f459e /sql/catalyst
parent91903e0a50b0efb7217610021a628b3043004d82 (diff)
downloadspark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.tar.gz
spark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.tar.bz2
spark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.zip
SPARK-2226: [SQL] transform HAVING clauses with aggregate expressions that aren't in the aggregation list
This change adds an analyzer rule to 1. find expressions in `HAVING` clause filters that depend on unresolved attributes, 2. push these expressions down to the underlying aggregates, and then 3. project them away above the filter. It also enables the `HAVING` queries in the Hive compatibility suite. Author: William Benton <willb@redhat.com> Closes #1497 from willb/spark-2226 and squashes the following commits: 92c9a93 [William Benton] Removed unnecessary import f1d4f34 [William Benton] Cleanups missed in prior commit 0e1624f [William Benton] Incorporated suggestions from @marmbrus; thanks! 541d4ee [William Benton] Cleanups from review 5a12647 [William Benton] Explanatory comments and stylistic cleanups. c7f2b2c [William Benton] Whitelist HAVING queries. 29a26e3 [William Benton] Added rule to handle unresolved attributes in HAVING clauses (SPARK-2226)
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala27
1 files changed, 26 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c7188469bf..02bdb64f30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
-
/**
* A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
* when all relations are already filled in and the analyser needs only to resolve attribute
@@ -54,6 +53,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
StarExpansion ::
ResolveFunctions ::
GlobalAggregates ::
+ UnresolvedHavingClauseAttributes ::
typeCoercionRules :_*),
Batch("Check Analysis", Once,
CheckResolution),
@@ -152,6 +152,31 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
}
/**
+ * This rule finds expressions in HAVING clause filters that depend on
+ * unresolved attributes. It pushes these expressions down to the underlying
+ * aggregates and then projects them away above the filter.
+ */
+ object UnresolvedHavingClauseAttributes extends Rule[LogicalPlan] {
+ def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+ case filter @ Filter(havingCondition, aggregate @ Aggregate(_, originalAggExprs, _))
+ if !filter.resolved && aggregate.resolved && containsAggregate(havingCondition) => {
+ val evaluatedCondition = Alias(havingCondition, "havingCondition")()
+ val aggExprsWithHaving = evaluatedCondition +: originalAggExprs
+
+ Project(aggregate.output,
+ Filter(evaluatedCondition.toAttribute,
+ aggregate.copy(aggregateExpressions = aggExprsWithHaving)))
+ }
+
+ }
+
+ protected def containsAggregate(condition: Expression): Boolean =
+ condition
+ .collect { case ae: AggregateExpression => ae }
+ .nonEmpty
+ }
+
+ /**
* When a SELECT clause has only a single expression and that expression is a
* [[catalyst.expressions.Generator Generator]] we convert the
* [[catalyst.plans.logical.Project Project]] to a [[catalyst.plans.logical.Generate Generate]].