aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main
diff options
context:
space:
mode:
authorLiwei Lin <lwlin7@gmail.com>2017-01-10 19:35:46 +0800
committerWenchen Fan <wenchen@databricks.com>2017-01-10 19:35:46 +0800
commitacfc5f354332107cc744fb636e3730f6fc48b2fe (patch)
tree6d2e6e7561c2478931d79adfbe21993a7817fb1a /sql/catalyst/src/main
parentb0319c2ecb51bb97c3228afa4a384572b9ffbce6 (diff)
downloadspark-acfc5f354332107cc744fb636e3730f6fc48b2fe.tar.gz
spark-acfc5f354332107cc744fb636e3730f6fc48b2fe.tar.bz2
spark-acfc5f354332107cc744fb636e3730f6fc48b2fe.zip
[SPARK-16845][SQL] `GeneratedClass$SpecificOrdering` grows beyond 64 KB
## What changes were proposed in this pull request? Prior to this patch, we'll generate `compare(...)` for `GeneratedClass$SpecificOrdering` like below, leading to Janino exceptions saying the code grows beyond 64 KB. ``` scala /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* ..... */ ... /* 10969 */ private int compare(InternalRow a, InternalRow b) { /* 10970 */ InternalRow i = null; // Holds current row being evaluated. /* 10971 */ /* 1.... */ code for comparing field0 /* 1.... */ code for comparing field1 /* 1.... */ ... /* 1.... */ code for comparing field449 /* 15012 */ /* 15013 */ return 0; /* 15014 */ } /* 15015 */ } ``` This patch would break `compare(...)` into smaller `compare_xxx(...)` methods when necessary; then we'll get generated `compare(...)` like: ``` scala /* 001 */ public SpecificOrdering generate(Object[] references) { /* 002 */ return new SpecificOrdering(references); /* 003 */ } /* 004 */ /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* 006 */ /* 007 */ ... /* 1.... */ /* 11290 */ private int compare_0(InternalRow a, InternalRow b) { /* 11291 */ InternalRow i = null; // Holds current row being evaluated. /* 11292 */ /* 11293 */ i = a; /* 11294 */ boolean isNullA; /* 11295 */ UTF8String primitiveA; /* 11296 */ { /* 11297 */ /* 11298 */ Object obj = ((Expression) references[0]).eval(null); /* 11299 */ UTF8String value = (UTF8String) obj; /* 11300 */ isNullA = false; /* 11301 */ primitiveA = value; /* 11302 */ } /* 11303 */ i = b; /* 11304 */ boolean isNullB; /* 11305 */ UTF8String primitiveB; /* 11306 */ { /* 11307 */ /* 11308 */ Object obj = ((Expression) references[0]).eval(null); /* 11309 */ UTF8String value = (UTF8String) obj; /* 11310 */ isNullB = false; /* 11311 */ primitiveB = value; /* 11312 */ } /* 11313 */ if (isNullA && isNullB) { /* 11314 */ // Nothing /* 11315 */ } else if (isNullA) { /* 11316 */ return -1; /* 11317 */ } else if (isNullB) { /* 11318 */ return 1; /* 11319 */ } else { /* 11320 */ int comp = primitiveA.compare(primitiveB); /* 11321 */ if (comp != 0) { /* 11322 */ return comp; /* 11323 */ } /* 11324 */ } /* 11325 */ /* 11326 */ /* 11327 */ i = a; /* 11328 */ boolean isNullA1; /* 11329 */ UTF8String primitiveA1; /* 11330 */ { /* 11331 */ /* 11332 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11333 */ UTF8String value1 = (UTF8String) obj1; /* 11334 */ isNullA1 = false; /* 11335 */ primitiveA1 = value1; /* 11336 */ } /* 11337 */ i = b; /* 11338 */ boolean isNullB1; /* 11339 */ UTF8String primitiveB1; /* 11340 */ { /* 11341 */ /* 11342 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11343 */ UTF8String value1 = (UTF8String) obj1; /* 11344 */ isNullB1 = false; /* 11345 */ primitiveB1 = value1; /* 11346 */ } /* 11347 */ if (isNullA1 && isNullB1) { /* 11348 */ // Nothing /* 11349 */ } else if (isNullA1) { /* 11350 */ return -1; /* 11351 */ } else if (isNullB1) { /* 11352 */ return 1; /* 11353 */ } else { /* 11354 */ int comp = primitiveA1.compare(primitiveB1); /* 11355 */ if (comp != 0) { /* 11356 */ return comp; /* 11357 */ } /* 11358 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 12652 */ return 0; /* 12653 */ } /* 1.... */ /* 1.... */ ... /* 15387 */ /* 15388 */ public int compare(InternalRow a, InternalRow b) { /* 15389 */ /* 15390 */ int comp_0 = compare_0(a, b); /* 15391 */ if (comp_0 != 0) { /* 15392 */ return comp_0; /* 15393 */ } /* 15394 */ /* 15395 */ int comp_1 = compare_1(a, b); /* 15396 */ if (comp_1 != 0) { /* 15397 */ return comp_1; /* 15398 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 15450 */ return 0; /* 15451 */ } /* 15452 */ } ``` ## How was this patch tested? - a new added test case which - would fail prior to this patch - would pass with this patch - ordering correctness should already be covered by existing tests like those in `OrderingSuite` ## Acknowledgement A major part of this PR - the refactoring work of `splitExpression()` - has been done by ueshin. Author: Liwei Lin <lwlin7@gmail.com> Author: Takuya UESHIN <ueshin@happy-camper.st> Author: Takuya Ueshin <ueshin@happy-camper.st> Closes #15480 from lw-lin/spec-ordering-64k-.
Diffstat (limited to 'sql/catalyst/src/main')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala27
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala27
2 files changed, 47 insertions, 7 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d7746ca7a0..f8f868b59b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -640,8 +640,24 @@ class CodegenContext {
splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
}
- private def splitExpressions(
- expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = {
+ /**
+ * Splits the generated code of expressions into multiple functions, because function has
+ * 64kb code size limit in JVM
+ *
+ * @param expressions the codes to evaluate expressions.
+ * @param funcName the split function name base.
+ * @param arguments the list of (type, name) of the arguments of the split function.
+ * @param returnType the return type of the split function.
+ * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
+ * @param foldFunctions folds the split function calls.
+ */
+ def splitExpressions(
+ expressions: Seq[String],
+ funcName: String,
+ arguments: Seq[(String, String)],
+ returnType: String = "void",
+ makeSplitFunction: String => String = identity,
+ foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
val blocks = new ArrayBuffer[String]()
val blockBuilder = new StringBuilder()
for (code <- expressions) {
@@ -662,18 +678,19 @@ class CodegenContext {
blocks.head
} else {
val func = freshName(funcName)
+ val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
val functions = blocks.zipWithIndex.map { case (body, i) =>
val name = s"${func}_$i"
val code = s"""
- |private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) {
- | $body
+ |private $returnType $name($argString) {
+ | ${makeSplitFunction(body)}
|}
""".stripMargin
addNewFunction(name, code)
name
}
- functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n")
+ foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 1cef95654a..b7335f12b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -117,8 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
}
}
"""
- }.mkString("\n")
- comparisons
+ }
+
+ ctx.splitExpressions(
+ expressions = comparisons,
+ funcName = "compare",
+ arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
+ returnType = "int",
+ makeSplitFunction = { body =>
+ s"""
+ InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
+ $body
+ return 0;
+ """
+ },
+ foldFunctions = { funCalls =>
+ funCalls.zipWithIndex.map { case (funCall, i) =>
+ val comp = ctx.freshName("comp")
+ s"""
+ int $comp = $funCall;
+ if ($comp != 0) {
+ return $comp;
+ }
+ """
+ }.mkString
+ })
}
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {