aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@appier.com>2015-11-30 20:56:42 -0800
committerMichael Armbrust <michael@databricks.com>2015-11-30 20:56:42 -0800
commit9693b0d5a55bc1d2da96f04fe2c6de59a8dfcc1b (patch)
tree0f9ae61302974d55ddae53917c96053717558128 /sql/catalyst
parentf73379be2b0c286957b678a996cb56afc96015eb (diff)
downloadspark-9693b0d5a55bc1d2da96f04fe2c6de59a8dfcc1b.tar.gz
spark-9693b0d5a55bc1d2da96f04fe2c6de59a8dfcc1b.tar.bz2
spark-9693b0d5a55bc1d2da96f04fe2c6de59a8dfcc1b.zip
[SPARK-12018][SQL] Refactor common subexpression elimination code
JIRA: https://issues.apache.org/jira/browse/SPARK-12018 The code of common subexpression elimination can be factored and simplified. Some unnecessary variables can be removed. Author: Liang-Chi Hsieh <viirya@appier.com> Closes #10009 from viirya/refactor-subexpr-eliminate.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala10
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala34
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala4
3 files changed, 14 insertions, 34 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 169435a10e..b55d3653a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -94,13 +94,9 @@ abstract class Expression extends TreeNode[Expression] {
def gen(ctx: CodeGenContext): GeneratedExpressionCode = {
ctx.subExprEliminationExprs.get(this).map { subExprState =>
// This expression is repeated meaning the code to evaluated has already been added
- // as a function, `subExprState.fnName`. Just call that.
- val code =
- s"""
- |/* $this */
- |${subExprState.fnName}(${ctx.INPUT_ROW});
- """.stripMargin.trim
- GeneratedExpressionCode(code, subExprState.code.isNull, subExprState.code.value)
+ // as a function and called in advance. Just use it.
+ val code = s"/* $this */"
+ GeneratedExpressionCode(code, subExprState.isNull, subExprState.value)
}.getOrElse {
val isNull = ctx.freshName("isNull")
val primitive = ctx.freshName("primitive")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 2f3d6aeb86..440c7d2fc1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -104,16 +104,13 @@ class CodeGenContext {
val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
// State used for subexpression elimination.
- case class SubExprEliminationState(
- isLoaded: String,
- code: GeneratedExpressionCode,
- fnName: String)
+ case class SubExprEliminationState(isNull: String, value: String)
// Foreach expression that is participating in subexpression elimination, the state to use.
val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
- // The collection of isLoaded variables that need to be reset on each row.
- val subExprIsLoadedVariables = mutable.ArrayBuffer.empty[String]
+ // The collection of sub-exression result resetting methods that need to be called on each row.
+ val subExprResetVariables = mutable.ArrayBuffer.empty[String]
final val JAVA_BOOLEAN = "boolean"
final val JAVA_BYTE = "byte"
@@ -408,7 +405,6 @@ class CodeGenContext {
val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
commonExprs.foreach(e => {
val expr = e.head
- val isLoaded = freshName("isLoaded")
val isNull = freshName("isNull")
val value = freshName("value")
val fnName = freshName("evalExpr")
@@ -417,18 +413,12 @@ class CodeGenContext {
val code = expr.gen(this)
val fn =
s"""
- |private void $fnName(InternalRow ${INPUT_ROW}) {
- | if (!$isLoaded) {
- | ${code.code.trim}
- | $isLoaded = true;
- | $isNull = ${code.isNull};
- | $value = ${code.value};
- | }
+ |private void $fnName(InternalRow $INPUT_ROW) {
+ | ${code.code.trim}
+ | $isNull = ${code.isNull};
+ | $value = ${code.value};
|}
""".stripMargin
- code.code = fn
- code.isNull = isNull
- code.value = value
addNewFunction(fnName, fn)
@@ -448,18 +438,12 @@ class CodeGenContext {
// 2. Less code.
// Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with
// at least two nodes) as the cost of doing it is expected to be low.
-
- // Maintain the loaded value and isNull as member variables. This is necessary if the codegen
- // function is split across multiple functions.
- // TODO: maintaining this as a local variable probably allows the compiler to do better
- // optimizations.
- addMutableState("boolean", isLoaded, s"$isLoaded = false;")
addMutableState("boolean", isNull, s"$isNull = false;")
addMutableState(javaType(expr.dataType), value,
s"$value = ${defaultValue(expr.dataType)};")
- subExprIsLoadedVariables += isLoaded
- val state = SubExprEliminationState(isLoaded, code, fnName)
+ subExprResetVariables += s"$fnName($INPUT_ROW);"
+ val state = SubExprEliminationState(isNull, value)
e.foreach(subExprEliminationExprs.put(_, state))
})
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 7b6c9373eb..68005afb21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -287,8 +287,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
val holderClass = classOf[BufferHolder].getName
ctx.addMutableState(holderClass, bufferHolder, s"this.$bufferHolder = new $holderClass();")
- // Reset the isLoaded flag for each row.
- val subexprReset = ctx.subExprIsLoadedVariables.map { v => s"${v} = false;" }.mkString("\n")
+ // Reset the subexpression values for each row.
+ val subexprReset = ctx.subExprResetVariables.mkString("\n")
val code =
s"""