diff options
author | Sameer Agarwal <sameer@databricks.com> | 2016-03-29 16:46:45 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-03-29 16:46:45 -0700 |
commit | 366cac6fb0bb5591a0463c4696f5b9de2a294022 (patch) | |
tree | b1ff3c2dfa09d7202e9fcabe792658357f61d7b2 /sql/catalyst | |
parent | a7a93a116dd9813853ba6f112beb7763931d2006 (diff) | |
download | spark-366cac6fb0bb5591a0463c4696f5b9de2a294022.tar.gz spark-366cac6fb0bb5591a0463c4696f5b9de2a294022.tar.bz2 spark-366cac6fb0bb5591a0463c4696f5b9de2a294022.zip |
[SPARK-14225][SQL] Cap the length of toCommentSafeString at 128 chars
## What changes were proposed in this pull request?
Builds on https://github.com/apache/spark/pull/12022 and (a) appends "..." to truncated comment strings and (b) fixes indentation in lines after the commented strings if they happen to have a `(`, `{`, `)` or `}`
## How was this patch tested?
Manually examined the generated code.
Author: Sameer Agarwal <sameer@databricks.com>
Closes #12044 from sameeragarwal/comment.
Diffstat (limited to 'sql/catalyst')
3 files changed, 79 insertions, 8 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 9d99bbffbe..8e40754dc3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -43,15 +43,44 @@ object CodeFormatter { private class CodeFormatter { private val code = new StringBuilder - private var indentLevel = 0 private val indentSize = 2 + + // Tracks the level of indentation in the current line. + private var indentLevel = 0 private var indentString = "" private var currentLine = 1 + // Tracks the level of indentation in multi-line comment blocks. + private var inCommentBlock = false + private var indentLevelOutsideCommentBlock = indentLevel + private def addLine(line: String): Unit = { - val indentChange = - line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0) - val newIndentLevel = math.max(0, indentLevel + indentChange) + + // We currently infer the level of indentation of a given line based on a simple heuristic that + // examines the number of parenthesis and braces in that line. This isn't the most robust + // implementation but works for all code that we generate. + val indentChange = line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0) + var newIndentLevel = math.max(0, indentLevel + indentChange) + + // Please note that while we try to format the comment blocks in exactly the same way as the + // rest of the code, once the block ends, we reset the next line's indentation level to what it + // was immediately before entering the comment block. + if (!inCommentBlock) { + if (line.startsWith("/*")) { + // Handle multi-line comments + inCommentBlock = true + indentLevelOutsideCommentBlock = indentLevel + } else if (line.startsWith("//")) { + // Handle single line comments + newIndentLevel = indentLevel + } + } else { + if (line.endsWith("*/")) { + inCommentBlock = false + newIndentLevel = indentLevelOutsideCommentBlock + } + } + // Lines starting with '}' should be de-indented even if they contain '{' after; // in addition, lines ending with ':' are typically labels val thisLineIndent = if (line.startsWith("}") || line.startsWith(")") || line.endsWith(":")) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala index b11365b297..f879b34358 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala @@ -155,10 +155,13 @@ package object util { /** * Returns the string representation of this expression that is safe to be put in - * code comments of generated code. + * code comments of generated code. The length is capped at 128 characters. */ - def toCommentSafeString(str: String): String = - str.replace("*/", "\\*\\/").replace("\\u", "\\\\u") + def toCommentSafeString(str: String): String = { + val len = math.min(str.length, 128) + val suffix = if (str.length > len) "..." else "" + str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + suffix + } /* FIX ME implicit class debugLogging(a: Any) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index 9da1068e9c..d7836aa3b2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -18,13 +18,20 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.util._ class CodeFormatterSuite extends SparkFunSuite { def testCase(name: String)(input: String)(expected: String): Unit = { test(name) { - assert(CodeFormatter.format(input).trim === expected.trim) + if (CodeFormatter.format(input).trim !== expected.trim) { + fail( + s""" + |== FAIL: Formatted code doesn't match === + |${sideBySide(CodeFormatter.format(input).trim, expected.trim).mkString("\n")} + """.stripMargin) + } } } @@ -93,4 +100,36 @@ class CodeFormatterSuite extends SparkFunSuite { |/* 004 */ c) """.stripMargin } + + testCase("single line comments") { + """// This is a comment about class A { { { ( ( + |class A { + |class body; + |}""".stripMargin + }{ + """ + |/* 001 */ // This is a comment about class A { { { ( ( + |/* 002 */ class A { + |/* 003 */ class body; + |/* 004 */ } + """.stripMargin + } + + testCase("multi-line comments") { + """ /* This is a comment about + |class A { + |class body; ...*/ + |class A { + |class body; + |}""".stripMargin + }{ + """ + |/* 001 */ /* This is a comment about + |/* 002 */ class A { + |/* 003 */ class body; ...*/ + |/* 004 */ class A { + |/* 005 */ class body; + |/* 006 */ } + """.stripMargin + } } |