diff options
10 files changed, 82 insertions, 12 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index c7410925da..855ae6432d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -40,7 +40,7 @@ object CodeFormatter { var lastLine: String = "dummy" input.split('\n').foreach { l => val line = l.trim() - val skip = line == "" && (lastLine == "" || lastLine.endsWith("{")) + val skip = line == "" && (lastLine == "" || lastLine.endsWith("{") || lastLine.endsWith("*/")) if (!skip) { code.append(line) code.append("\n") @@ -49,6 +49,24 @@ object CodeFormatter { } code.result() } + + def stripOverlappingComments(codeAndComment: CodeAndComment): CodeAndComment = { + val code = new StringBuilder + val map = codeAndComment.comment + var lastLine: String = "dummy" + codeAndComment.body.split('\n').foreach { l => + val line = l.trim() + val skip = lastLine.startsWith("/*") && lastLine.endsWith("*/") && + line.startsWith("/*") && line.endsWith("*/") && + map(lastLine).substring(3).contains(map(line).substring(3)) + if (!skip) { + code.append(line) + code.append("\n") + } + lastLine = line + } + new CodeAndComment(code.result().trim(), map) + } } private class CodeFormatter { @@ -100,8 +118,11 @@ private class CodeFormatter { indentString } code.append(f"/* ${currentLine}%03d */ ") - code.append(thisLineIndent) - code.append(line) + if (line.trim().length > 0) { + code.append(thisLineIndent) + if (inCommentBlock && line.startsWith("*") || line.startsWith("*/")) code.append(" ") + code.append(line) + } code.append("\n") indentLevel = newIndentLevel indentString = " " * (indentSize * newIndentLevel) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala index 1305289e78..0f82d2e613 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala @@ -133,7 +133,8 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP } """ - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1c53d62a5e..c10829d4f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -136,7 +136,8 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } }""" - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"Generated Ordering by ${ordering.mkString(",")}:\n${CodeFormatter.format(code)}") CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala index ef44e6b46b..106bb27964 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala @@ -61,7 +61,8 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool } }""" - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}") val p = CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala index 214dc40641..b891f94673 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala @@ -181,7 +181,8 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] } """ - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index 102f276e9b..5efba4b3a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -390,7 +390,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro } """ - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala index 4dc1678ff6..4aa5ec8247 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala @@ -193,7 +193,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U | } |} """.stripMargin - val code = new CodeAndComment(codeBody, Map.empty) + val code = CodeFormatter.stripOverlappingComments(new CodeAndComment(codeBody, Map.empty)) logDebug(s"SpecificUnsafeRowJoiner($schema1, $schema2):\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index 6022f2dbbe..76afc2e8ec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -36,6 +36,22 @@ class CodeFormatterSuite extends SparkFunSuite { } } + test("removing overlapping comments") { + val code = new CodeAndComment( + """/*project_c4*/ + |/*project_c3*/ + |/*project_c2*/ + """.stripMargin, + Map( + "/*project_c4*/" -> "// (((input[0, bigint, false] + 1) + 2) + 3))", + "/*project_c3*/" -> "// ((input[0, bigint, false] + 1) + 2)", + "/*project_c2*/" -> "// (input[0, bigint, false] + 1)" + )) + + val reducedCode = CodeFormatter.stripOverlappingComments(code) + assert(reducedCode.body === "/*project_c4*/") + } + testCase("basic example") { """class A { |blahblah; @@ -147,4 +163,31 @@ class CodeFormatterSuite extends SparkFunSuite { |/* 006 */ } """.stripMargin } + + // scalastyle:off whitespace.end.of.line + testCase("reduce empty lines") { + CodeFormatter.stripExtraNewLines( + """class A { + | + | + | /*** comment1 */ + | + | class body; + | + | + | if (c) {duh;} + | else {boo;} + |}""".stripMargin) + }{ + """ + |/* 001 */ class A { + |/* 002 */ /*** comment1 */ + |/* 003 */ class body; + |/* 004 */ + |/* 005 */ if (c) {duh;} + |/* 006 */ else {boo;} + |/* 007 */ } + """.stripMargin + } + // scalastyle:on whitespace.end.of.line } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index 2a1ce735b7..908e22de73 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -333,8 +333,8 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co """.trim // try to compile, helpful for debug - val cleanedSource = - new CodeAndComment(CodeFormatter.stripExtraNewLines(source), ctx.getPlaceHolderToComments()) + val cleanedSource = CodeFormatter.stripOverlappingComments( + new CodeAndComment(CodeFormatter.stripExtraNewLines(source), ctx.getPlaceHolderToComments())) logDebug(s"\n${CodeFormatter.format(cleanedSource)}") CodeGenerator.compile(cleanedSource) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala index e0b48119f6..1041bab9d5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala @@ -224,7 +224,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera } }""" - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"Generated ColumnarIterator:\n${CodeFormatter.format(code)}") CodeGenerator.compile(code).generate(Array.empty).asInstanceOf[ColumnarIterator] |