aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-11-19 17:14:10 -0800
committerReynold Xin <rxin@databricks.com>2015-11-19 17:14:10 -0800
commitee21407747fb00db2f26d1119446ccbb20c19232 (patch)
tree60efa669e9da9e278d16484725d5eaa5ffd7b3ad
parentb2cecb80ece59a1c086d4ae7aeebef445a4e7299 (diff)
downloadspark-ee21407747fb00db2f26d1119446ccbb20c19232.tar.gz
spark-ee21407747fb00db2f26d1119446ccbb20c19232.tar.bz2
spark-ee21407747fb00db2f26d1119446ccbb20c19232.zip
[SPARK-11864][SQL] Improve performance of max/min
This PR has the following optimization: 1) The greatest/least already does the null-check, so the `If` and `IsNull` are not necessary. 2) In greatest/least, it should initialize the result using the first child (removing one block). 3) For primitive types, the generated greater expression is too complicated (`a > b ? 1 : (a < b) ? -1 : 0) > 0`), should be as simple as `a > b` Combine these optimization, this could improve the performance of `ss_max` query by 30%. Author: Davies Liu <davies@databricks.com> Closes #9846 from davies/improve_max.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala5
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala5
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala12
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala38
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala10
5 files changed, 45 insertions, 25 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
index 61cae44cd0..906003188d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
@@ -46,13 +46,12 @@ case class Max(child: Expression) extends DeclarativeAggregate {
)
override lazy val updateExpressions: Seq[Expression] = Seq(
- /* max = */ If(IsNull(child), max, If(IsNull(max), child, Greatest(Seq(max, child))))
+ /* max = */ Greatest(Seq(max, child))
)
override lazy val mergeExpressions: Seq[Expression] = {
- val greatest = Greatest(Seq(max.left, max.right))
Seq(
- /* max = */ If(IsNull(max.right), max.left, If(IsNull(max.left), max.right, greatest))
+ /* max = */ Greatest(Seq(max.left, max.right))
)
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
index 242456d9e2..39f7afbd08 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
@@ -47,13 +47,12 @@ case class Min(child: Expression) extends DeclarativeAggregate {
)
override lazy val updateExpressions: Seq[Expression] = Seq(
- /* min = */ If(IsNull(child), min, If(IsNull(min), child, Least(Seq(min, child))))
+ /* min = */ Least(Seq(min, child))
)
override lazy val mergeExpressions: Seq[Expression] = {
- val least = Least(Seq(min.left, min.right))
Seq(
- /* min = */ If(IsNull(min.right), min.left, If(IsNull(min.left), min.right, least))
+ /* min = */ Least(Seq(min.left, min.right))
)
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 1718cfbd35..1b7260cdfe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -330,6 +330,18 @@ class CodeGenContext {
}
/**
+ * Generates code for greater of two expressions.
+ *
+ * @param dataType data type of the expressions
+ * @param c1 name of the variable of expression 1's output
+ * @param c2 name of the variable of expression 2's output
+ */
+ def genGreater(dataType: DataType, c1: String, c2: String): String = javaType(dataType) match {
+ case JAVA_BYTE | JAVA_SHORT | JAVA_INT | JAVA_LONG => s"$c1 > $c2"
+ case _ => s"(${genComp(dataType, c1, c2)}) > 0"
+ }
+
+ /**
* List of java data types that have special accessors and setters in [[InternalRow]].
*/
val primitiveTypes =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 0d4af43978..694a2a7c54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -348,19 +348,22 @@ case class Least(children: Seq[Expression]) extends Expression {
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val evalChildren = children.map(_.gen(ctx))
- def updateEval(i: Int): String =
+ val first = evalChildren(0)
+ val rest = evalChildren.drop(1)
+ def updateEval(eval: GeneratedExpressionCode): String =
s"""
- if (!${evalChildren(i).isNull} && (${ev.isNull} ||
- ${ctx.genComp(dataType, evalChildren(i).value, ev.value)} < 0)) {
+ ${eval.code}
+ if (!${eval.isNull} && (${ev.isNull} ||
+ ${ctx.genGreater(dataType, ev.value, eval.value)})) {
${ev.isNull} = false;
- ${ev.value} = ${evalChildren(i).value};
+ ${ev.value} = ${eval.value};
}
"""
s"""
- ${evalChildren.map(_.code).mkString("\n")}
- boolean ${ev.isNull} = true;
- ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
- ${children.indices.map(updateEval).mkString("\n")}
+ ${first.code}
+ boolean ${ev.isNull} = ${first.isNull};
+ ${ctx.javaType(dataType)} ${ev.value} = ${first.value};
+ ${rest.map(updateEval).mkString("\n")}
"""
}
}
@@ -403,19 +406,22 @@ case class Greatest(children: Seq[Expression]) extends Expression {
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val evalChildren = children.map(_.gen(ctx))
- def updateEval(i: Int): String =
+ val first = evalChildren(0)
+ val rest = evalChildren.drop(1)
+ def updateEval(eval: GeneratedExpressionCode): String =
s"""
- if (!${evalChildren(i).isNull} && (${ev.isNull} ||
- ${ctx.genComp(dataType, evalChildren(i).value, ev.value)} > 0)) {
+ ${eval.code}
+ if (!${eval.isNull} && (${ev.isNull} ||
+ ${ctx.genGreater(dataType, eval.value, ev.value)})) {
${ev.isNull} = false;
- ${ev.value} = ${evalChildren(i).value};
+ ${ev.value} = ${eval.value};
}
"""
s"""
- ${evalChildren.map(_.code).mkString("\n")}
- boolean ${ev.isNull} = true;
- ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
- ${children.indices.map(updateEval).mkString("\n")}
+ ${first.code}
+ boolean ${ev.isNull} = ${first.isNull};
+ ${ctx.javaType(dataType)} ${ev.value} = ${first.value};
+ ${rest.map(updateEval).mkString("\n")}
"""
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 94deafb75b..df4747d4e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -62,11 +62,15 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
}
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+ val first = children(0)
+ val rest = children.drop(1)
+ val firstEval = first.gen(ctx)
s"""
- boolean ${ev.isNull} = true;
- ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+ ${firstEval.code}
+ boolean ${ev.isNull} = ${firstEval.isNull};
+ ${ctx.javaType(dataType)} ${ev.value} = ${firstEval.value};
""" +
- children.map { e =>
+ rest.map { e =>
val eval = e.gen(ctx)
s"""
if (${ev.isNull}) {