diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-04-01 22:45:52 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-04-01 22:45:52 -0700 |
commit | fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a (patch) | |
tree | 1b0e52e2617c8021960cb5aa3beba5fff6f999c2 | |
parent | 877dc712e66db69cb320e10ba5edebca401591e3 (diff) | |
download | spark-fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a.tar.gz spark-fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a.tar.bz2 spark-fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a.zip |
[SPARK-14251][SQL] Add SQL command for printing out generated code for debugging
## What changes were proposed in this pull request?
This PR implements `EXPLAIN CODEGEN` SQL command which returns generated codes like `debugCodegen`. In `spark-shell`, we don't need to `import debug` module. In `spark-sql`, we can use this SQL command now.
**Before**
```
scala> import org.apache.spark.sql.execution.debug._
scala> sql("select 'a' as a group by 1").debugCodegen()
Found 2 WholeStageCodegen subtrees.
== Subtree 1 / 2 ==
...
Generated code:
...
== Subtree 2 / 2 ==
...
Generated code:
...
```
**After**
```
scala> sql("explain extended codegen select 'a' as a group by 1").collect().foreach(println)
[Found 2 WholeStageCodegen subtrees.]
[== Subtree 1 / 2 ==]
...
[]
[Generated code:]
...
[]
[== Subtree 2 / 2 ==]
...
[]
[Generated code:]
...
```
## How was this patch tested?
Pass the Jenkins tests (including new testcases)
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #12099 from dongjoon-hyun/SPARK-14251.
7 files changed, 67 insertions, 31 deletions
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index d1747b9915..f34bb061e4 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -584,7 +584,7 @@ frameBound explainOption - : LOGICAL | FORMATTED | EXTENDED + : LOGICAL | FORMATTED | EXTENDED | CODEGEN ; transactionMode @@ -633,7 +633,7 @@ nonReserved | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS | GROUPING | CUBE | ROLLUP - | EXPLAIN | FORMAT | LOGICAL | FORMATTED + | EXPLAIN | FORMAT | LOGICAL | FORMATTED | CODEGEN | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF | SET | VIEW | REPLACE @@ -724,6 +724,7 @@ DESCRIBE: 'DESCRIBE'; EXPLAIN: 'EXPLAIN'; FORMAT: 'FORMAT'; LOGICAL: 'LOGICAL'; +CODEGEN: 'CODEGEN'; CAST: 'CAST'; SHOW: 'SHOW'; TABLES: 'TABLES'; diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 7efe98dd18..ff3ab7746c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -136,7 +136,8 @@ class SparkSqlAstBuilder extends AstBuilder { // Create the explain comment. val statement = plan(ctx.statement) if (isExplainableStatement(statement)) { - ExplainCommand(statement, extended = options.exists(_.EXTENDED != null)) + ExplainCommand(statement, extended = options.exists(_.EXTENDED != null), + codegen = options.exists(_.CODEGEN != null)) } else { ExplainCommand(OneRowRelation) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala index f90d8717ca..4bc62cdc4a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala @@ -28,10 +28,10 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.debug._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ - /** * A logical command that is executed for its side-effects. `RunnableCommand`s are * wrapped in `ExecutedCommand` during execution. @@ -237,15 +237,22 @@ case class ExplainCommand( logicalPlan: LogicalPlan, override val output: Seq[Attribute] = Seq(AttributeReference("plan", StringType, nullable = true)()), - extended: Boolean = false) + extended: Boolean = false, + codegen: Boolean = false) extends RunnableCommand { // Run through the optimizer to generate the physical plan. override def run(sqlContext: SQLContext): Seq[Row] = try { // TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties. val queryExecution = sqlContext.executePlan(logicalPlan) - val outputString = if (extended) queryExecution.toString else queryExecution.simpleString - + val outputString = + if (codegen) { + codegenString(queryExecution.executedPlan) + } else if (extended) { + queryExecution.toString + } else { + queryExecution.simpleString + } outputString.split("\n").map(Row(_)) } catch { case cause: TreeNodeException[_] => ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala index 3a174ed94c..7b0c8ebdfa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala @@ -48,6 +48,25 @@ package object debug { // scalastyle:on println } + def codegenString(plan: SparkPlan): String = { + val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]() + plan transform { + case s: WholeStageCodegen => + codegenSubtrees += s + s + case s => s + } + var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n" + for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) { + output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n" + output += s + output += "\nGenerated code:\n" + val (_, source) = s.doCodeGen() + output += s"${CodeFormatter.format(source)}\n" + } + output + } + /** * Augments [[SQLContext]] with debug methods. */ @@ -81,28 +100,7 @@ package object debug { * WholeStageCodegen subtree). */ def debugCodegen(): Unit = { - debugPrint(debugCodegenString()) - } - - /** Visible for testing. */ - def debugCodegenString(): String = { - val plan = query.queryExecution.executedPlan - val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]() - plan transform { - case s: WholeStageCodegen => - codegenSubtrees += s - s - case s => s - } - var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n" - for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) { - output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n" - output += s - output += "\nGenerated code:\n" - val (_, source) = s.doCodeGen() - output += s"${CodeFormatter.format(source)}\n" - } - output + debugPrint(codegenString(query.queryExecution.executedPlan)) } } @@ -123,6 +121,7 @@ package object debug { /** * A collection of metrics for each column of output. + * * @param elementTypes the actual runtime types for the output. Useful when there are bugs * causing the wrong data to be projected. */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala index 979265e274..c0fce4b96a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala @@ -27,7 +27,7 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext { } test("debugCodegen") { - val res = sqlContext.range(10).groupBy("id").count().debugCodegenString() + val res = codegenString(sqlContext.range(10).groupBy("id").count().queryExecution.executedPlan) assert(res.contains("Subtree 1 / 2")) assert(res.contains("Subtree 2 / 2")) assert(res.contains("Object[]")) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala index cd26a68f35..64d1341a47 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala @@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.{BucketSpec, DataSource, LogicalRelation} import org.apache.spark.sql.hive.HiveContext diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala index b7ef5d1db7..c45d49d6c0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala @@ -101,4 +101,33 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto "Physical Plan should not contain Subquery since it's eliminated by optimizer") } } + + test("EXPLAIN CODEGEN command") { + checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), true, + "WholeStageCodegen", + "Generated code:", + "/* 001 */ public Object generate(Object[] references) {", + "/* 002 */ return new GeneratedIterator(references);", + "/* 003 */ }" + ) + + checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), false, + "== Physical Plan ==" + ) + + checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), true, + "WholeStageCodegen", + "Generated code:", + "/* 001 */ public Object generate(Object[] references) {", + "/* 002 */ return new GeneratedIterator(references);", + "/* 003 */ }" + ) + + checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), false, + "== Parsed Logical Plan ==", + "== Analyzed Logical Plan ==", + "== Optimized Logical Plan ==", + "== Physical Plan ==" + ) + } } |