[SPARK-13270][SQL] Remove extra new lines in whole stage codegen and include pipeline plan in comments.

Author: Nong Li <nong@databricks.com> Closes #11155 from nongli/spark-13270.
author: Nong Li <nong@databricks.com> 2016-02-10 23:52:19 -0800
committer: Reynold Xin <rxin@databricks.com> 2016-02-10 23:52:19 -0800
commit: 18bcbbdd84e80222d1d29530831c6d68d02e7593 (patch)
tree: 70d65918ef93b962a9b7763ae3f62d4b9a5455de
parent: e88bff12795a6134e2e7204996b603e948380e18 (diff)
download: spark-18bcbbdd84e80222d1d29530831c6d68d02e7593.tar.gz
spark-18bcbbdd84e80222d1d29530831c6d68d02e7593.tar.bz2
spark-18bcbbdd84e80222d1d29530831c6d68d02e7593.zip
2 files changed, 20 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index 9b8b6382d7..9d99bbffbe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -25,6 +25,20 @@ package org.apache.spark.sql.catalyst.expressions.codegen
  */
 object CodeFormatter {
   def format(code: String): String = new CodeFormatter().addLines(code).result()
+  def stripExtraNewLines(input: String): String = {
+    val code = new StringBuilder
+    var lastLine: String = "dummy"
+    input.split('\n').foreach { l =>
+      val line = l.trim()
+      val skip = line == "" && (lastLine == "" || lastLine.endsWith("{"))
+      if (!skip) {
+        code.append(line)
+        code.append("\n")
+      }
+      lastLine = line
+    }
+    code.result()
+  }
 }
 
 private class CodeFormatter {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala
index b200239c94..30f74fc14f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala
@@ -237,6 +237,9 @@ case class WholeStageCodegen(plan: CodegenSupport, children: Seq[SparkPlan])
         return new GeneratedIterator(references);
       }
 
+      /** Codegened pipeline for:
+        * ${plan.treeString.trim}
+        */
       class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator {
 
         private Object[] references;
@@ -256,8 +259,9 @@ case class WholeStageCodegen(plan: CodegenSupport, children: Seq[SparkPlan])
       """
 
     // try to compile, helpful for debug
-    // println(s"${CodeFormatter.format(source)}")
-    CodeGenerator.compile(source)
+    val cleanedSource = CodeFormatter.stripExtraNewLines(source)
+    // println(s"${CodeFormatter.format(cleanedSource)}")
+    CodeGenerator.compile(cleanedSource)
 
     plan.upstream().mapPartitions { iter =>
author	Nong Li <nong@databricks.com>	2016-02-10 23:52:19 -0800
committer	Reynold Xin <rxin@databricks.com>	2016-02-10 23:52:19 -0800
commit	18bcbbdd84e80222d1d29530831c6d68d02e7593 (patch)
tree	70d65918ef93b962a9b7763ae3f62d4b9a5455de
parent	e88bff12795a6134e2e7204996b603e948380e18 (diff)
download	spark-18bcbbdd84e80222d1d29530831c6d68d02e7593.tar.gz spark-18bcbbdd84e80222d1d29530831c6d68d02e7593.tar.bz2 spark-18bcbbdd84e80222d1d29530831c6d68d02e7593.zip