From 6cb6096c016178b9ce5c97592abe529ddb18cef2 Mon Sep 17 00:00:00 2001 From: Forest Fang Date: Sat, 18 Jul 2015 21:05:44 -0700 Subject: [SPARK-8443][SQL] Split GenerateMutableProjection Codegen due to JVM Code Size Limits By grouping projection calls into multiple apply function, we are able to push the number of projections codegen can handle from ~1k to ~60k. I have set the unit test to test against 5k as 60k took 15s for the unit test to complete. Author: Forest Fang Closes #7076 from saurfang/codegen_size_limit and squashes the following commits: b7a7635 [Forest Fang] [SPARK-8443][SQL] Execute and verify split projections in test adef95a [Forest Fang] [SPARK-8443][SQL] Use safer factor and rewrite splitting code 1b5aa7e [Forest Fang] [SPARK-8443][SQL] inline execution if one block only 9405680 [Forest Fang] [SPARK-8443][SQL] split projection code by size limit --- .../codegen/GenerateMutableProjection.scala | 39 ++++++++++++++++++++-- .../catalyst/expressions/CodeGenerationSuite.scala | 14 +++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala index 71e47d4f9b..b82bd6814b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.sql.catalyst.expressions._ +import scala.collection.mutable.ArrayBuffer + // MutableProjection is not accessible in Java abstract class BaseMutableProjection extends MutableProjection @@ -45,10 +47,41 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu else ${ctx.setColumn("mutableRow", e.dataType, i, evaluationCode.primitive)}; """ - }.mkString("\n") + } + // collect projections into blocks as function has 64kb codesize limit in JVM + val projectionBlocks = new ArrayBuffer[String]() + val blockBuilder = new StringBuilder() + for (projection <- projectionCode) { + if (blockBuilder.length > 16 * 1000) { + projectionBlocks.append(blockBuilder.toString()) + blockBuilder.clear() + } + blockBuilder.append(projection) + } + projectionBlocks.append(blockBuilder.toString()) + + val (projectionFuns, projectionCalls) = { + // inline execution if codesize limit was not broken + if (projectionBlocks.length == 1) { + ("", projectionBlocks.head) + } else { + ( + projectionBlocks.zipWithIndex.map { case (body, i) => + s""" + |private void apply$i(InternalRow i) { + | $body + |} + """.stripMargin + }.mkString, + projectionBlocks.indices.map(i => s"apply$i(i);").mkString("\n") + ) + } + } + val mutableStates = ctx.mutableStates.map { case (javaType, variableName, initialValue) => s"private $javaType $variableName = $initialValue;" }.mkString("\n ") + val code = s""" public Object generate($exprType[] expr) { return new SpecificProjection(expr); @@ -75,9 +108,11 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu return (InternalRow) mutableRow; } + $projectionFuns + public Object apply(Object _i) { InternalRow i = (InternalRow) _i; - $projectionCode + $projectionCalls return mutableRow; } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 481b335d15..e05218a23a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ /** * Additional tests for code generation. */ -class CodeGenerationSuite extends SparkFunSuite { +class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { test("multithreaded eval") { import scala.concurrent._ @@ -42,4 +42,16 @@ class CodeGenerationSuite extends SparkFunSuite { futures.foreach(Await.result(_, 10.seconds)) } + + test("SPARK-8443: split wide projections into blocks due to JVM code size limit") { + val length = 5000 + val expressions = List.fill(length)(EqualTo(Literal(1), Literal(1))) + val plan = GenerateMutableProjection.generate(expressions)() + val actual = plan(new GenericMutableRow(length)).toSeq + val expected = Seq.fill(length)(true) + + if (!checkResult(actual, expected)) { + fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected") + } + } } -- cgit v1.2.3