aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-07-24 19:35:24 -0700
committerReynold Xin <rxin@databricks.com>2015-07-24 19:35:24 -0700
commitc84acd4aa4f8bee98baa550cd6801c797a8a7a25 (patch)
tree9a99181583bab8a70ba53b10b8f32b23efb2733b
parentf99cb5615cbc0b469d52af6bd08f8bf888af58f3 (diff)
downloadspark-c84acd4aa4f8bee98baa550cd6801c797a8a7a25.tar.gz
spark-c84acd4aa4f8bee98baa550cd6801c797a8a7a25.tar.bz2
spark-c84acd4aa4f8bee98baa550cd6801c797a8a7a25.zip
[SPARK-9331][SQL] Add a code formatter to auto-format generated code.
The generated expression code can be hard to read since they are not indented well. This patch adds a code formatter that formats code automatically when we output them to the screen. Author: Reynold Xin <rxin@databricks.com> Closes #7656 from rxin/codeformatter and squashes the following commits: 5ba0e90 [Reynold Xin] [SPARK-9331][SQL] Add a code formatter to auto-format generated code.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala60
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala11
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala76
8 files changed, 148 insertions, 10 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
new file mode 100644
index 0000000000..2087cc7f10
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+/**
+ * An utility class that indents a block of code based on the curly braces.
+ *
+ * This is used to prettify generated code when in debug mode (or exceptions).
+ *
+ * Written by Matei Zaharia.
+ */
+object CodeFormatter {
+ def format(code: String): String = new CodeFormatter().addLines(code).result()
+}
+
+private class CodeFormatter {
+ private val code = new StringBuilder
+ private var indentLevel = 0
+ private val indentSize = 2
+ private var indentString = ""
+
+ private def addLine(line: String): Unit = {
+ val indentChange = line.count(_ == '{') - line.count(_ == '}')
+ val newIndentLevel = math.max(0, indentLevel + indentChange)
+ // Lines starting with '}' should be de-indented even if they contain '{' after;
+ // in addition, lines ending with ':' are typically labels
+ val thisLineIndent = if (line.startsWith("}") || line.endsWith(":")) {
+ " " * (indentSize * (indentLevel - 1))
+ } else {
+ indentString
+ }
+ code.append(thisLineIndent)
+ code.append(line)
+ code.append("\n")
+ indentLevel = newIndentLevel
+ indentString = " " * (indentSize * newIndentLevel)
+ }
+
+ private def addLines(code: String): CodeFormatter = {
+ code.split('\n').foreach(s => addLine(s.trim()))
+ this
+ }
+
+ private def result(): String = code.result()
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 4a90f1b559..508882acbe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -299,7 +299,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
evaluator.cook(code)
} catch {
case e: Exception =>
- val msg = s"failed to compile:\n $code"
+ val msg = "failed to compile:\n " + CodeFormatter.format(code)
logError(msg, e)
throw new Exception(msg, e)
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index d838268f46..825031a4fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -17,11 +17,11 @@
package org.apache.spark.sql.catalyst.expressions.codegen
+import scala.collection.mutable.ArrayBuffer
+
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
-import scala.collection.mutable.ArrayBuffer
-
// MutableProjection is not accessible in Java
abstract class BaseMutableProjection extends MutableProjection
@@ -45,10 +45,11 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
val evaluationCode = e.gen(ctx)
evaluationCode.code +
s"""
- if(${evaluationCode.isNull})
+ if (${evaluationCode.isNull}) {
mutableRow.setNullAt($i);
- else
+ } else {
${ctx.setColumn("mutableRow", e.dataType, i, evaluationCode.primitive)};
+ }
"""
}
// collect projections into blocks as function has 64kb codesize limit in JVM
@@ -119,7 +120,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
}
"""
- logDebug(s"code for ${expressions.mkString(",")}:\n$code")
+ logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = compile(code)
() => {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 2e6f9e204d..dbd4616d28 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -107,7 +107,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
}
}"""
- logDebug(s"Generated Ordering: $code")
+ logDebug(s"Generated Ordering: ${CodeFormatter.format(code)}")
compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering]
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index 1dda5992c3..dfd593fb7c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -60,7 +60,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
}
}"""
- logDebug(s"Generated predicate '$predicate':\n$code")
+ logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}")
val p = compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
(r: InternalRow) => p.eval(r)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
index f0efc4bff1..a361b216eb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
@@ -230,7 +230,8 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
}
"""
- logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n${code}")
+ logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n" +
+ CodeFormatter.format(code))
compile(code).generate(ctx.references.toArray).asInstanceOf[Projection]
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index d65e5c38eb..0320bcb827 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -114,7 +114,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
}
"""
- logDebug(s"code for ${expressions.mkString(",")}:\n$code")
+ logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
val c = compile(code)
c.generate(ctx.references.toArray).asInstanceOf[UnsafeProjection]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
new file mode 100644
index 0000000000..478702fea6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.SparkFunSuite
+
+
+class CodeFormatterSuite extends SparkFunSuite {
+
+ def testCase(name: String)(input: String)(expected: String): Unit = {
+ test(name) {
+ assert(CodeFormatter.format(input).trim === expected.trim)
+ }
+ }
+
+ testCase("basic example") {
+ """
+ |class A {
+ |blahblah;
+ |}
+ """.stripMargin
+ }{
+ """
+ |class A {
+ | blahblah;
+ |}
+ """.stripMargin
+ }
+
+ testCase("nested example") {
+ """
+ |class A {
+ | if (c) {
+ |duh;
+ |}
+ |}
+ """.stripMargin
+ } {
+ """
+ |class A {
+ | if (c) {
+ | duh;
+ | }
+ |}
+ """.stripMargin
+ }
+
+ testCase("single line") {
+ """
+ |class A {
+ | if (c) {duh;}
+ |}
+ """.stripMargin
+ }{
+ """
+ |class A {
+ | if (c) {duh;}
+ |}
+ """.stripMargin
+ }
+}