aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-05-24 10:08:14 -0700
committerDavies Liu <davies.liu@gmail.com>2016-05-24 10:08:14 -0700
commitf8763b80ecd9968566018396c8cdc1851e7f8a46 (patch)
tree60834146260721ba357bca443a550514902b9542
parentc24b6b679c3efa053f7de19be73eb36dc70d9930 (diff)
downloadspark-f8763b80ecd9968566018396c8cdc1851e7f8a46.tar.gz
spark-f8763b80ecd9968566018396c8cdc1851e7f8a46.tar.bz2
spark-f8763b80ecd9968566018396c8cdc1851e7f8a46.zip
[SPARK-13135] [SQL] Don't print expressions recursively in generated code
## What changes were proposed in this pull request? This PR is an up-to-date and a little bit improved version of #11019 of rxin for - (1) preventing recursive printing of expressions in generated code. Since the major function of this PR is indeed the above, he should be credited for the work he did. In addition to #11019, this PR improves the followings in code generation. - (2) Improve multiline comment indentation. - (3) Reduce the number of empty lines (mainly consecutive empty lines). - (4) Remove all space characters on empty lines. **Example** ```scala spark.range(1, 1000).select('id+1+2+3, 'id+4+5+6) ``` **Before** ``` Generated code: /* 001 */ public Object generate(Object[] references) { ... /* 005 */ /** /* 006 */ * Codegend pipeline for /* 007 */ * Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 008 */ * +- Range 1, 1, 8, 999, [id#0L] /* 009 */ */ ... /* 075 */ // PRODUCE: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 076 */ /* 077 */ // PRODUCE: Range 1, 1, 8, 999, [id#0L] /* 078 */ /* 079 */ // initialize Range ... /* 092 */ // CONSUME: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 093 */ /* 094 */ // CONSUME: WholeStageCodegen /* 095 */ /* 096 */ // (((input[0, bigint, false] + 1) + 2) + 3) /* 097 */ // ((input[0, bigint, false] + 1) + 2) /* 098 */ // (input[0, bigint, false] + 1) ... /* 107 */ // (((input[0, bigint, false] + 4) + 5) + 6) /* 108 */ // ((input[0, bigint, false] + 4) + 5) /* 109 */ // (input[0, bigint, false] + 4) ... /* 126 */ } ``` **After** ``` Generated code: /* 001 */ public Object generate(Object[] references) { ... /* 005 */ /** /* 006 */ * Codegend pipeline for /* 007 */ * Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 008 */ * +- Range 1, 1, 8, 999, [id#0L] /* 009 */ */ ... /* 075 */ // PRODUCE: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 076 */ // PRODUCE: Range 1, 1, 8, 999, [id#0L] /* 077 */ // initialize Range ... /* 090 */ // CONSUME: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 091 */ // CONSUME: WholeStageCodegen /* 092 */ // (((input[0, bigint, false] + 1) + 2) + 3) ... /* 101 */ // (((input[0, bigint, false] + 4) + 5) + 6) ... /* 118 */ } ``` ## How was this patch tested? Pass the Jenkins tests and see the result of the following command manually. ```scala scala> spark.range(1, 1000).select('id+1+2+3, 'id+4+5+6).queryExecution.debug.codegen() ``` Author: Dongjoon Hyun <dongjoonapache.org> Author: Reynold Xin <rxindatabricks.com> Author: Dongjoon Hyun <dongjoon@apache.org> Closes #13192 from dongjoon-hyun/SPARK-13135.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala27
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala43
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala3
10 files changed, 82 insertions, 12 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index c7410925da..855ae6432d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -40,7 +40,7 @@ object CodeFormatter {
var lastLine: String = "dummy"
input.split('\n').foreach { l =>
val line = l.trim()
- val skip = line == "" && (lastLine == "" || lastLine.endsWith("{"))
+ val skip = line == "" && (lastLine == "" || lastLine.endsWith("{") || lastLine.endsWith("*/"))
if (!skip) {
code.append(line)
code.append("\n")
@@ -49,6 +49,24 @@ object CodeFormatter {
}
code.result()
}
+
+ def stripOverlappingComments(codeAndComment: CodeAndComment): CodeAndComment = {
+ val code = new StringBuilder
+ val map = codeAndComment.comment
+ var lastLine: String = "dummy"
+ codeAndComment.body.split('\n').foreach { l =>
+ val line = l.trim()
+ val skip = lastLine.startsWith("/*") && lastLine.endsWith("*/") &&
+ line.startsWith("/*") && line.endsWith("*/") &&
+ map(lastLine).substring(3).contains(map(line).substring(3))
+ if (!skip) {
+ code.append(line)
+ code.append("\n")
+ }
+ lastLine = line
+ }
+ new CodeAndComment(code.result().trim(), map)
+ }
}
private class CodeFormatter {
@@ -100,8 +118,11 @@ private class CodeFormatter {
indentString
}
code.append(f"/* ${currentLine}%03d */ ")
- code.append(thisLineIndent)
- code.append(line)
+ if (line.trim().length > 0) {
+ code.append(thisLineIndent)
+ if (inCommentBlock && line.startsWith("*") || line.startsWith("*/")) code.append(" ")
+ code.append(line)
+ }
code.append("\n")
indentLevel = newIndentLevel
indentString = " " * (indentSize * newIndentLevel)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 1305289e78..0f82d2e613 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -133,7 +133,8 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
}
"""
- val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+ val code = CodeFormatter.stripOverlappingComments(
+ new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 1c53d62a5e..c10829d4f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -136,7 +136,8 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
}
}"""
- val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+ val code = CodeFormatter.stripOverlappingComments(
+ new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
logDebug(s"Generated Ordering by ${ordering.mkString(",")}:\n${CodeFormatter.format(code)}")
CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index ef44e6b46b..106bb27964 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -61,7 +61,8 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
}
}"""
- val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+ val code = CodeFormatter.stripOverlappingComments(
+ new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}")
val p = CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 214dc40641..b891f94673 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -181,7 +181,8 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
}
"""
- val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+ val code = CodeFormatter.stripOverlappingComments(
+ new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 102f276e9b..5efba4b3a6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -390,7 +390,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
}
"""
- val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+ val code = CodeFormatter.stripOverlappingComments(
+ new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
index 4dc1678ff6..4aa5ec8247 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
@@ -193,7 +193,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
| }
|}
""".stripMargin
- val code = new CodeAndComment(codeBody, Map.empty)
+ val code = CodeFormatter.stripOverlappingComments(new CodeAndComment(codeBody, Map.empty))
logDebug(s"SpecificUnsafeRowJoiner($schema1, $schema2):\n${CodeFormatter.format(code)}")
val c = CodeGenerator.compile(code)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
index 6022f2dbbe..76afc2e8ec 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -36,6 +36,22 @@ class CodeFormatterSuite extends SparkFunSuite {
}
}
+ test("removing overlapping comments") {
+ val code = new CodeAndComment(
+ """/*project_c4*/
+ |/*project_c3*/
+ |/*project_c2*/
+ """.stripMargin,
+ Map(
+ "/*project_c4*/" -> "// (((input[0, bigint, false] + 1) + 2) + 3))",
+ "/*project_c3*/" -> "// ((input[0, bigint, false] + 1) + 2)",
+ "/*project_c2*/" -> "// (input[0, bigint, false] + 1)"
+ ))
+
+ val reducedCode = CodeFormatter.stripOverlappingComments(code)
+ assert(reducedCode.body === "/*project_c4*/")
+ }
+
testCase("basic example") {
"""class A {
|blahblah;
@@ -147,4 +163,31 @@ class CodeFormatterSuite extends SparkFunSuite {
|/* 006 */ }
""".stripMargin
}
+
+ // scalastyle:off whitespace.end.of.line
+ testCase("reduce empty lines") {
+ CodeFormatter.stripExtraNewLines(
+ """class A {
+ |
+ |
+ | /*** comment1 */
+ |
+ | class body;
+ |
+ |
+ | if (c) {duh;}
+ | else {boo;}
+ |}""".stripMargin)
+ }{
+ """
+ |/* 001 */ class A {
+ |/* 002 */ /*** comment1 */
+ |/* 003 */ class body;
+ |/* 004 */
+ |/* 005 */ if (c) {duh;}
+ |/* 006 */ else {boo;}
+ |/* 007 */ }
+ """.stripMargin
+ }
+ // scalastyle:on whitespace.end.of.line
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 2a1ce735b7..908e22de73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -333,8 +333,8 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
""".trim
// try to compile, helpful for debug
- val cleanedSource =
- new CodeAndComment(CodeFormatter.stripExtraNewLines(source), ctx.getPlaceHolderToComments())
+ val cleanedSource = CodeFormatter.stripOverlappingComments(
+ new CodeAndComment(CodeFormatter.stripExtraNewLines(source), ctx.getPlaceHolderToComments()))
logDebug(s"\n${CodeFormatter.format(cleanedSource)}")
CodeGenerator.compile(cleanedSource)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index e0b48119f6..1041bab9d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -224,7 +224,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
}
}"""
- val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+ val code = CodeFormatter.stripOverlappingComments(
+ new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
logDebug(s"Generated ColumnarIterator:\n${CodeFormatter.format(code)}")
CodeGenerator.compile(code).generate(Array.empty).asInstanceOf[ColumnarIterator]