aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <simonh@tw.ibm.com>2016-05-04 10:54:51 -0700
committerDavies Liu <davies.liu@gmail.com>2016-05-04 10:54:51 -0700
commitb85d21fb9dc3d498d9a10e065d254abde797efb6 (patch)
tree50cf5f6c1b9e6252e010eec1bd7063bcbd24cbec /sql/catalyst
parentd864c55cf8c92466336e796d0c98d83230e330af (diff)
downloadspark-b85d21fb9dc3d498d9a10e065d254abde797efb6.tar.gz
spark-b85d21fb9dc3d498d9a10e065d254abde797efb6.tar.bz2
spark-b85d21fb9dc3d498d9a10e065d254abde797efb6.zip
[SPARK-14951] [SQL] Support subexpression elimination in TungstenAggregate
## What changes were proposed in this pull request? We can support subexpression elimination in TungstenAggregate by using current `EquivalentExpressions` which is already used in subexpression elimination for expression codegen. However, in wholestage codegen, we can't wrap the common expression's codes in functions as before, we simply generate the code snippets for common expressions. These code snippets are inserted before the common expressions are actually used in generated java codes. For multiple `TypedAggregateExpression` used in aggregation operator, since their input type should be the same. So their `inputDeserializer` will be the same too. This patch can also reduce redundant input deserialization. ## How was this patch tested? Existing tests. Author: Liang-Chi Hsieh <simonh@tw.ibm.com> Closes #12729 from viirya/subexpr-elimination-tungstenaggregate.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala7
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala74
2 files changed, 76 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index d0ad7a05a0..b8e2b67b2f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -68,7 +68,10 @@ class EquivalentExpressions {
* is found. That is, if `expr` has already been added, its children are not added.
* If ignoreLeaf is true, leaf nodes are ignored.
*/
- def addExprTree(root: Expression, ignoreLeaf: Boolean = true): Unit = {
+ def addExprTree(
+ root: Expression,
+ ignoreLeaf: Boolean = true,
+ skipReferenceToExpressions: Boolean = true): Unit = {
val skip = root.isInstanceOf[LeafExpression] && ignoreLeaf
// There are some special expressions that we should not recurse into children.
// 1. CodegenFallback: it's children will not be used to generate code (call eval() instead)
@@ -77,7 +80,7 @@ class EquivalentExpressions {
// TODO: some expressions implements `CodegenFallback` but can still do codegen,
// e.g. `CaseWhen`, we should support them.
case _: CodegenFallback => false
- case _: ReferenceToExpressions => false
+ case _: ReferenceToExpressions if skipReferenceToExpressions => false
case _ => true
}
if (!skip && !addExpr(root) && shouldRecurse) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index e4fa429b37..67f6719265 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -47,6 +47,25 @@ import org.apache.spark.util.Utils
case class ExprCode(var code: String, var isNull: String, var value: String)
/**
+ * State used for subexpression elimination.
+ *
+ * @param isNull A term that holds a boolean value representing whether the expression evaluated
+ * to null.
+ * @param value A term for a value of a common sub-expression. Not valid if `isNull`
+ * is set to `true`.
+ */
+case class SubExprEliminationState(isNull: String, value: String)
+
+/**
+ * Codes and common subexpressions mapping used for subexpression elimination.
+ *
+ * @param codes Strings representing the codes that evaluate common subexpressions.
+ * @param states Foreach expression that is participating in subexpression elimination,
+ * the state to use.
+ */
+case class SubExprCodes(codes: Seq[String], states: Map[Expression, SubExprEliminationState])
+
+/**
* A context for codegen, tracking a list of objects that could be passed into generated Java
* function.
*/
@@ -148,9 +167,6 @@ class CodegenContext {
*/
val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
- // State used for subexpression elimination.
- case class SubExprEliminationState(isNull: String, value: String)
-
// Foreach expression that is participating in subexpression elimination, the state to use.
val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
@@ -572,6 +588,58 @@ class CodegenContext {
}
/**
+ * Perform a function which generates a sequence of ExprCodes with a given mapping between
+ * expressions and common expressions, instead of using the mapping in current context.
+ */
+ def withSubExprEliminationExprs(
+ newSubExprEliminationExprs: Map[Expression, SubExprEliminationState])(
+ f: => Seq[ExprCode]): Seq[ExprCode] = {
+ val oldsubExprEliminationExprs = subExprEliminationExprs
+ subExprEliminationExprs.clear
+ newSubExprEliminationExprs.foreach(subExprEliminationExprs += _)
+
+ val genCodes = f
+
+ // Restore previous subExprEliminationExprs
+ subExprEliminationExprs.clear
+ oldsubExprEliminationExprs.foreach(subExprEliminationExprs += _)
+ genCodes
+ }
+
+ /**
+ * Checks and sets up the state and codegen for subexpression elimination. This finds the
+ * common subexpressions, generates the code snippets that evaluate those expressions and
+ * populates the mapping of common subexpressions to the generated code snippets. The generated
+ * code snippets will be returned and should be inserted into generated codes before these
+ * common subexpressions actually are used first time.
+ */
+ def subexpressionEliminationForWholeStageCodegen(expressions: Seq[Expression]): SubExprCodes = {
+ // Create a clear EquivalentExpressions and SubExprEliminationState mapping
+ val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+ val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
+
+ // Add each expression tree and compute the common subexpressions.
+ expressions.foreach(equivalentExpressions.addExprTree(_, true, false))
+
+ // Get all the expressions that appear at least twice and set up the state for subexpression
+ // elimination.
+ val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
+ val codes = commonExprs.map { e =>
+ val expr = e.head
+ val fnName = freshName("evalExpr")
+ val isNull = s"${fnName}IsNull"
+ val value = s"${fnName}Value"
+
+ // Generate the code for this expression tree.
+ val code = expr.genCode(this)
+ val state = SubExprEliminationState(code.isNull, code.value)
+ e.foreach(subExprEliminationExprs.put(_, state))
+ code.code.trim
+ }
+ SubExprCodes(codes, subExprEliminationExprs.toMap)
+ }
+
+ /**
* Checks and sets up the state and codegen for subexpression elimination. This finds the
* common subexpressions, generates the functions that evaluate those expressions and populates
* the mapping of common subexpressions to the generated functions.