aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSameer Agarwal <sameer@databricks.com>2016-03-29 16:46:45 -0700
committerReynold Xin <rxin@databricks.com>2016-03-29 16:46:45 -0700
commit366cac6fb0bb5591a0463c4696f5b9de2a294022 (patch)
treeb1ff3c2dfa09d7202e9fcabe792658357f61d7b2 /sql
parenta7a93a116dd9813853ba6f112beb7763931d2006 (diff)
downloadspark-366cac6fb0bb5591a0463c4696f5b9de2a294022.tar.gz
spark-366cac6fb0bb5591a0463c4696f5b9de2a294022.tar.bz2
spark-366cac6fb0bb5591a0463c4696f5b9de2a294022.zip
[SPARK-14225][SQL] Cap the length of toCommentSafeString at 128 chars
## What changes were proposed in this pull request? Builds on https://github.com/apache/spark/pull/12022 and (a) appends "..." to truncated comment strings and (b) fixes indentation in lines after the commented strings if they happen to have a `(`, `{`, `)` or `}` ## How was this patch tested? Manually examined the generated code. Author: Sameer Agarwal <sameer@databricks.com> Closes #12044 from sameeragarwal/comment.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala37
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala9
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala41
3 files changed, 79 insertions, 8 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index 9d99bbffbe..8e40754dc3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -43,15 +43,44 @@ object CodeFormatter {
private class CodeFormatter {
private val code = new StringBuilder
- private var indentLevel = 0
private val indentSize = 2
+
+ // Tracks the level of indentation in the current line.
+ private var indentLevel = 0
private var indentString = ""
private var currentLine = 1
+ // Tracks the level of indentation in multi-line comment blocks.
+ private var inCommentBlock = false
+ private var indentLevelOutsideCommentBlock = indentLevel
+
private def addLine(line: String): Unit = {
- val indentChange =
- line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0)
- val newIndentLevel = math.max(0, indentLevel + indentChange)
+
+ // We currently infer the level of indentation of a given line based on a simple heuristic that
+ // examines the number of parenthesis and braces in that line. This isn't the most robust
+ // implementation but works for all code that we generate.
+ val indentChange = line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0)
+ var newIndentLevel = math.max(0, indentLevel + indentChange)
+
+ // Please note that while we try to format the comment blocks in exactly the same way as the
+ // rest of the code, once the block ends, we reset the next line's indentation level to what it
+ // was immediately before entering the comment block.
+ if (!inCommentBlock) {
+ if (line.startsWith("/*")) {
+ // Handle multi-line comments
+ inCommentBlock = true
+ indentLevelOutsideCommentBlock = indentLevel
+ } else if (line.startsWith("//")) {
+ // Handle single line comments
+ newIndentLevel = indentLevel
+ }
+ } else {
+ if (line.endsWith("*/")) {
+ inCommentBlock = false
+ newIndentLevel = indentLevelOutsideCommentBlock
+ }
+ }
+
// Lines starting with '}' should be de-indented even if they contain '{' after;
// in addition, lines ending with ':' are typically labels
val thisLineIndent = if (line.startsWith("}") || line.startsWith(")") || line.endsWith(":")) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index b11365b297..f879b34358 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -155,10 +155,13 @@ package object util {
/**
* Returns the string representation of this expression that is safe to be put in
- * code comments of generated code.
+ * code comments of generated code. The length is capped at 128 characters.
*/
- def toCommentSafeString(str: String): String =
- str.replace("*/", "\\*\\/").replace("\\u", "\\\\u")
+ def toCommentSafeString(str: String): String = {
+ val len = math.min(str.length, 128)
+ val suffix = if (str.length > len) "..." else ""
+ str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + suffix
+ }
/* FIX ME
implicit class debugLogging(a: Any) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
index 9da1068e9c..d7836aa3b2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -18,13 +18,20 @@
package org.apache.spark.sql.catalyst.expressions.codegen
import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util._
class CodeFormatterSuite extends SparkFunSuite {
def testCase(name: String)(input: String)(expected: String): Unit = {
test(name) {
- assert(CodeFormatter.format(input).trim === expected.trim)
+ if (CodeFormatter.format(input).trim !== expected.trim) {
+ fail(
+ s"""
+ |== FAIL: Formatted code doesn't match ===
+ |${sideBySide(CodeFormatter.format(input).trim, expected.trim).mkString("\n")}
+ """.stripMargin)
+ }
}
}
@@ -93,4 +100,36 @@ class CodeFormatterSuite extends SparkFunSuite {
|/* 004 */ c)
""".stripMargin
}
+
+ testCase("single line comments") {
+ """// This is a comment about class A { { { ( (
+ |class A {
+ |class body;
+ |}""".stripMargin
+ }{
+ """
+ |/* 001 */ // This is a comment about class A { { { ( (
+ |/* 002 */ class A {
+ |/* 003 */ class body;
+ |/* 004 */ }
+ """.stripMargin
+ }
+
+ testCase("multi-line comments") {
+ """ /* This is a comment about
+ |class A {
+ |class body; ...*/
+ |class A {
+ |class body;
+ |}""".stripMargin
+ }{
+ """
+ |/* 001 */ /* This is a comment about
+ |/* 002 */ class A {
+ |/* 003 */ class body; ...*/
+ |/* 004 */ class A {
+ |/* 005 */ class body;
+ |/* 006 */ }
+ """.stripMargin
+ }
}