aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorKousuke Saruta <sarutak@oss.nttdata.co.jp>2016-05-20 10:56:35 -0700
committerDavies Liu <davies.liu@gmail.com>2016-05-20 10:56:35 -0700
commit22947cd0213856442025baf653be588c6c707e36 (patch)
tree9b2a042ecabf17e37dcd221a05e99fe263e3785c /sql
parent5a25cd4ff3eccbbb4653e1fb43dd9026bb2df331 (diff)
downloadspark-22947cd0213856442025baf653be588c6c707e36.tar.gz
spark-22947cd0213856442025baf653be588c6c707e36.tar.bz2
spark-22947cd0213856442025baf653be588c6c707e36.zip
[SPARK-15165] [SPARK-15205] [SQL] Introduce place holder for comments in generated code
## What changes were proposed in this pull request? This PR introduce place holder for comment in generated code and the purpose is same for #12939 but much safer. Generated code to be compiled doesn't include actual comments but includes place holder instead. Place holders in generated code will be replaced with actual comments only at the time of logging. Also, this PR can resolve SPARK-15205. ## How was this patch tested? Existing tests. Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp> Closes #12979 from sarutak/SPARK-15205.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala12
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala59
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala21
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala5
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala5
15 files changed, 95 insertions, 57 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index fab163476f..b4fe151f27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.catalyst.util.toCommentSafeString
import org.apache.spark.sql.types._
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -97,15 +96,14 @@ abstract class Expression extends TreeNode[Expression] {
ctx.subExprEliminationExprs.get(this).map { subExprState =>
// This expression is repeated which means that the code to evaluate it has already been added
// as a function before. In that case, we just re-use it.
- val code = s"/* ${toCommentSafeString(this.toString)} */"
- ExprCode(code, subExprState.isNull, subExprState.value)
+ ExprCode(ctx.registerComment(this.toString), subExprState.isNull, subExprState.value)
}.getOrElse {
val isNull = ctx.freshName("isNull")
val value = ctx.freshName("value")
val ve = doGenCode(ctx, ExprCode("", isNull, value))
if (ve.code.nonEmpty) {
// Add `this` in the comment.
- ve.copy(s"/* ${toCommentSafeString(this.toString)} */\n" + ve.code.trim)
+ ve.copy(code = s"${ctx.registerComment(this.toString)}\n" + ve.code.trim)
} else {
ve
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index ab4831f7ab..c7410925da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.expressions.codegen
+import org.apache.commons.lang3.StringUtils
+
/**
* An utility class that indents a block of code based on the curly braces and parentheses.
* This is used to prettify generated code when in debug mode (or exceptions).
@@ -24,7 +26,15 @@ package org.apache.spark.sql.catalyst.expressions.codegen
* Written by Matei Zaharia.
*/
object CodeFormatter {
- def format(code: String): String = new CodeFormatter().addLines(code).result()
+ def format(code: CodeAndComment): String = {
+ new CodeFormatter().addLines(
+ StringUtils.replaceEach(
+ code.body,
+ code.comment.keys.toArray,
+ code.comment.values.toArray)
+ ).result
+ }
+
def stripExtraNewLines(input: String): String = {
val code = new StringBuilder
var lastLine: String = "dummy"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 67f6719265..8b74d606db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -200,6 +200,11 @@ class CodegenContext {
var freshNamePrefix = ""
/**
+ * The map from a place holder to a corresponding comment
+ */
+ private val placeHolderToComments = new mutable.HashMap[String, String]
+
+ /**
* Returns a term name that is unique within this instance of a `CodegenContext`.
*/
def freshName(name: String): String = synchronized {
@@ -706,6 +711,35 @@ class CodegenContext {
if (doSubexpressionElimination) subexpressionElimination(expressions)
expressions.map(e => e.genCode(this))
}
+
+ /**
+ * get a map of the pair of a place holder and a corresponding comment
+ */
+ def getPlaceHolderToComments(): collection.Map[String, String] = placeHolderToComments
+
+ /**
+ * Register a multi-line comment and return the corresponding place holder
+ */
+ private def registerMultilineComment(text: String): String = {
+ val placeHolder = s"/*${freshName("c")}*/"
+ val comment = text.split("(\r\n)|\r|\n").mkString("/**\n * ", "\n * ", "\n */")
+ placeHolderToComments += (placeHolder -> comment)
+ placeHolder
+ }
+
+ /**
+ * Register a comment and return the corresponding place holder
+ */
+ def registerComment(text: String): String = {
+ if (text.contains("\n") || text.contains("\r")) {
+ registerMultilineComment(text)
+ } else {
+ val placeHolder = s"/*${freshName("c")}*/"
+ val safeComment = s"// $text"
+ placeHolderToComments += (placeHolder -> safeComment)
+ placeHolder
+ }
+ }
}
/**
@@ -717,6 +751,19 @@ abstract class GeneratedClass {
}
/**
+ * A wrapper for the source code to be compiled by [[CodeGenerator]].
+ */
+class CodeAndComment(val body: String, val comment: collection.Map[String, String])
+ extends Serializable {
+ override def equals(that: Any): Boolean = that match {
+ case t: CodeAndComment if t.body == body => true
+ case _ => false
+ }
+
+ override def hashCode(): Int = body.hashCode
+}
+
+/**
* A base class for generators of byte code to perform expression evaluation. Includes a set of
* helpers for referring to Catalyst types and building trees that perform evaluation of individual
* expressions.
@@ -760,14 +807,14 @@ object CodeGenerator extends Logging {
/**
* Compile the Java source code into a Java class, using Janino.
*/
- def compile(code: String): GeneratedClass = {
+ def compile(code: CodeAndComment): GeneratedClass = {
cache.get(code)
}
/**
* Compile the Java source code into a Java class, using Janino.
*/
- private[this] def doCompile(code: String): GeneratedClass = {
+ private[this] def doCompile(code: CodeAndComment): GeneratedClass = {
val evaluator = new ClassBodyEvaluator()
evaluator.setParentClassLoader(Utils.getContextOrSparkClassLoader)
// Cannot be under package codegen, or fail with java.lang.InstantiationException
@@ -788,7 +835,7 @@ object CodeGenerator extends Logging {
))
evaluator.setExtendedClass(classOf[GeneratedClass])
- def formatted = CodeFormatter.format(code)
+ lazy val formatted = CodeFormatter.format(code)
logDebug({
// Only add extra debugging info to byte code when we are going to print the source code.
@@ -797,7 +844,7 @@ object CodeGenerator extends Logging {
})
try {
- evaluator.cook("generated.java", code)
+ evaluator.cook("generated.java", code.body)
} catch {
case e: Exception =>
val msg = s"failed to compile: $e\n$formatted"
@@ -819,8 +866,8 @@ object CodeGenerator extends Logging {
private val cache = CacheBuilder.newBuilder()
.maximumSize(100)
.build(
- new CacheLoader[String, GeneratedClass]() {
- override def load(code: String): GeneratedClass = {
+ new CacheLoader[CodeAndComment, GeneratedClass]() {
+ override def load(code: CodeAndComment): GeneratedClass = {
val startTime = System.nanoTime()
val result = doCompile(code)
val endTime = System.nanoTime()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala
index 2bd77c65c3..6a5a3e7933 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala
@@ -18,7 +18,6 @@
package org.apache.spark.sql.catalyst.expressions.codegen
import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, Nondeterministic}
-import org.apache.spark.sql.catalyst.util.toCommentSafeString
/**
* A trait that can be used to provide a fallback mode for expression code generation.
@@ -36,9 +35,10 @@ trait CodegenFallback extends Expression {
val idx = ctx.references.length
ctx.references += this
val objectTerm = ctx.freshName("obj")
+ val placeHolder = ctx.registerComment(this.toString)
if (nullable) {
ev.copy(code = s"""
- /* expression: ${toCommentSafeString(this.toString)} */
+ $placeHolder
Object $objectTerm = ((Expression) references[$idx]).eval($input);
boolean ${ev.isNull} = $objectTerm == null;
${ctx.javaType(this.dataType)} ${ev.value} = ${ctx.defaultValue(this.dataType)};
@@ -47,7 +47,7 @@ trait CodegenFallback extends Expression {
}""")
} else {
ev.copy(code = s"""
- /* expression: ${toCommentSafeString(this.toString)} */
+ $placeHolder
Object $objectTerm = ((Expression) references[$idx]).eval($input);
${ctx.javaType(this.dataType)} ${ev.value} = (${ctx.boxedType(this.dataType)}) $objectTerm;
""", isNull = "false")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index f143b40443..1305289e78 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -94,7 +94,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
val allProjections = ctx.splitExpressions(ctx.INPUT_ROW, projectionCodes)
val allUpdates = ctx.splitExpressions(ctx.INPUT_ROW, updates)
- val code = s"""
+ val codeBody = s"""
public java.lang.Object generate(Object[] references) {
return new SpecificMutableProjection(references);
}
@@ -133,6 +133,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
}
"""
+ val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index dc4825cdd8..1c53d62a5e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -113,7 +113,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
val ctx = newCodeGenContext()
val comparisons = genComparisons(ctx, ordering)
- val code = s"""
+ val codeBody = s"""
public SpecificOrdering generate(Object[] references) {
return new SpecificOrdering(references);
}
@@ -136,6 +136,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
}
}"""
+ val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
logDebug(s"Generated Ordering by ${ordering.mkString(",")}:\n${CodeFormatter.format(code)}")
CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index dd8e2a289a..ef44e6b46b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -40,7 +40,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
protected def create(predicate: Expression): ((InternalRow) => Boolean) = {
val ctx = newCodeGenContext()
val eval = predicate.genCode(ctx)
- val code = s"""
+ val codeBody = s"""
public SpecificPredicate generate(Object[] references) {
return new SpecificPredicate(references);
}
@@ -61,6 +61,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
}
}"""
+ val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}")
val p = CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index ee1a363145..b0b1212553 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -155,7 +155,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
"""
}
val allExpressions = ctx.splitExpressions(ctx.INPUT_ROW, expressionCodes)
- val code = s"""
+ val codeBody = s"""
public java.lang.Object generate(Object[] references) {
return new SpecificSafeProjection(references);
}
@@ -181,6 +181,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
}
"""
+ val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 6aa9cbf08b..102f276e9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -362,7 +362,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
val ctx = newCodeGenContext()
val eval = createCode(ctx, expressions, subexpressionEliminationEnabled)
- val code = s"""
+ val codeBody = s"""
public java.lang.Object generate(Object[] references) {
return new SpecificUnsafeProjection(references);
}
@@ -390,6 +390,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
}
"""
+ val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
index b1ffbaa3e9..4dc1678ff6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
@@ -157,7 +157,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
}.mkString("\n")
// ------------------------ Finally, put everything together --------------------------- //
- val code = s"""
+ val codeBody = s"""
|public java.lang.Object generate(Object[] references) {
| return new SpecificUnsafeRowJoiner();
|}
@@ -193,7 +193,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
| }
|}
""".stripMargin
-
+ val code = new CodeAndComment(codeBody, Map.empty)
logDebug(s"SpecificUnsafeRowJoiner($schema1, $schema2):\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index f1d6cab9a5..4005087dad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -155,27 +155,6 @@ package object util {
def toPrettySQL(e: Expression): String = usePrettyExpression(e).sql
- /**
- * Returns the string representation of this expression that is safe to be put in
- * code comments of generated code. The length is capped at 128 characters.
- */
- def toCommentSafeString(str: String): String = {
- val len = math.min(str.length, 128)
- val suffix = if (str.length > len) "..." else ""
-
- // Unicode literals, like \u0022, should be escaped before
- // they are put in code comment to avoid codegen breaking.
- // To escape them, single "\" should be prepended to a series of "\" just before "u"
- // only when the number of "\" is odd.
- // For example, \u0022 should become to \\u0022
- // but \\u0022 should not become to \\\u0022 because the first backslash escapes the second one,
- // and \u0022 will remain, means not escaped.
- // Otherwise, the runtime Java compiler will fail to compile or code injection can be allowed.
- // For details, see SPARK-15165.
- str.substring(0, len).replace("*/", "*\\/")
- .replaceAll("(^|[^\\\\])(\\\\(\\\\\\\\)*u)", "$1\\\\$2") + suffix
- }
-
/* FIX ME
implicit class debugLogging(a: Any) {
def debugLogging() {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
index f57b82bb96..6022f2dbbe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -25,11 +25,12 @@ class CodeFormatterSuite extends SparkFunSuite {
def testCase(name: String)(input: String)(expected: String): Unit = {
test(name) {
- if (CodeFormatter.format(input).trim !== expected.trim) {
+ val sourceCode = new CodeAndComment(input, Map.empty)
+ if (CodeFormatter.format(sourceCode).trim !== expected.trim) {
fail(
s"""
|== FAIL: Formatted code doesn't match ===
- |${sideBySide(CodeFormatter.format(input).trim, expected.trim).mkString("\n")}
+ |${sideBySide(CodeFormatter.format(sourceCode).trim, expected.trim).mkString("\n")}
""".stripMargin)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index d8911f88b0..ec23a9c41a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, UnknownPartitioning}
-import org.apache.spark.sql.catalyst.util.toCommentSafeString
import org.apache.spark.sql.execution.datasources.HadoopFsRelation
import org.apache.spark.sql.execution.datasources.parquet.{DefaultSource => ParquetSource}
import org.apache.spark.sql.execution.metric.SQLMetrics
@@ -253,7 +252,7 @@ private[sql] case class BatchedDataSourceScanExec(
val isNullVar = if (nullable) { ctx.freshName("isNull") } else { "false" }
val valueVar = ctx.freshName("value")
val str = s"columnVector[$columnVar, $ordinal, ${dataType.simpleString}]"
- val code = s"/* ${toCommentSafeString(str)} */\n" + (if (nullable) {
+ val code = s"${ctx.registerComment(str)}\n" + (if (nullable) {
s"""
boolean ${isNullVar} = ${columnVar}.isNullAt($ordinal);
$javaType ${valueVar} = ${isNullVar} ? ${ctx.defaultValue(dataType)} : ($value);
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index d6f7b6ed35..37fdc362b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.plans.physical.Partitioning
import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.toCommentSafeString
import org.apache.spark.sql.execution.aggregate.TungstenAggregate
import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
import org.apache.spark.sql.execution.metric.SQLMetrics
@@ -79,7 +78,7 @@ trait CodegenSupport extends SparkPlan {
this.parent = parent
ctx.freshNamePrefix = variablePrefix
s"""
- |/*** PRODUCE: ${toCommentSafeString(this.simpleString)} */
+ |${ctx.registerComment(s"PRODUCE: ${this.simpleString}")}
|${doProduce(ctx)}
""".stripMargin
}
@@ -147,8 +146,7 @@ trait CodegenSupport extends SparkPlan {
ctx.freshNamePrefix = parent.variablePrefix
val evaluated = evaluateRequiredVariables(output, inputVars, parent.usedInputs)
s"""
- |
- |/*** CONSUME: ${toCommentSafeString(parent.simpleString)} */
+ |${ctx.registerComment(s"CONSUME: ${parent.simpleString}")}
|$evaluated
|${parent.doConsume(ctx, inputVars, rowVar)}
""".stripMargin
@@ -299,7 +297,7 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
*
* @return the tuple of the codegen context and the actual generated source.
*/
- def doCodeGen(): (CodegenContext, String) = {
+ def doCodeGen(): (CodegenContext, CodeAndComment) = {
val ctx = new CodegenContext
val code = child.asInstanceOf[CodegenSupport].produce(ctx, this)
val source = s"""
@@ -307,9 +305,7 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
return new GeneratedIterator(references);
}
- /** Codegened pipeline for:
- * ${toCommentSafeString(child.treeString.trim)}
- */
+ ${ctx.registerComment(s"""Codegend pipeline for\n${child.treeString.trim}""")}
final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator {
private Object[] references;
@@ -333,7 +329,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
""".trim
// try to compile, helpful for debug
- val cleanedSource = CodeFormatter.stripExtraNewLines(source)
+ val cleanedSource =
+ new CodeAndComment(CodeFormatter.stripExtraNewLines(source), ctx.getPlaceHolderToComments())
+
logDebug(s"\n${CodeFormatter.format(cleanedSource)}")
CodeGenerator.compile(cleanedSource)
(ctx, cleanedSource)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index bd5cb800dd..e0b48119f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.columnar
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator, UnsafeRowWriter}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeFormatter, CodeGenerator, UnsafeRowWriter}
import org.apache.spark.sql.types._
/**
@@ -150,7 +150,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
(0 to groupedAccessorsLength - 1).map { i => s"extractors$i();" }.mkString("\n"))
}
- val code = s"""
+ val codeBody = s"""
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import scala.collection.Iterator;
@@ -224,6 +224,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
}
}"""
+ val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
logDebug(s"Generated ColumnarIterator:\n${CodeFormatter.format(code)}")
CodeGenerator.compile(code).generate(Array.empty).asInstanceOf[ColumnarIterator]