aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-04-01 22:45:52 -0700
committerReynold Xin <rxin@databricks.com>2016-04-01 22:45:52 -0700
commitfa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a (patch)
tree1b0e52e2617c8021960cb5aa3beba5fff6f999c2
parent877dc712e66db69cb320e10ba5edebca401591e3 (diff)
downloadspark-fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a.tar.gz
spark-fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a.tar.bz2
spark-fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a.zip
[SPARK-14251][SQL] Add SQL command for printing out generated code for debugging
## What changes were proposed in this pull request? This PR implements `EXPLAIN CODEGEN` SQL command which returns generated codes like `debugCodegen`. In `spark-shell`, we don't need to `import debug` module. In `spark-sql`, we can use this SQL command now. **Before** ``` scala> import org.apache.spark.sql.execution.debug._ scala> sql("select 'a' as a group by 1").debugCodegen() Found 2 WholeStageCodegen subtrees. == Subtree 1 / 2 == ... Generated code: ... == Subtree 2 / 2 == ... Generated code: ... ``` **After** ``` scala> sql("explain extended codegen select 'a' as a group by 1").collect().foreach(println) [Found 2 WholeStageCodegen subtrees.] [== Subtree 1 / 2 ==] ... [] [Generated code:] ... [] [== Subtree 2 / 2 ==] ... [] [Generated code:] ... ``` ## How was this patch tested? Pass the Jenkins tests (including new testcases) Author: Dongjoon Hyun <dongjoon@apache.org> Closes #12099 from dongjoon-hyun/SPARK-14251.
-rw-r--r--sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g45
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala15
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala43
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala1
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala29
7 files changed, 67 insertions, 31 deletions
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d1747b9915..f34bb061e4 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -584,7 +584,7 @@ frameBound
explainOption
- : LOGICAL | FORMATTED | EXTENDED
+ : LOGICAL | FORMATTED | EXTENDED | CODEGEN
;
transactionMode
@@ -633,7 +633,7 @@ nonReserved
| DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED
| EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS
| GROUPING | CUBE | ROLLUP
- | EXPLAIN | FORMAT | LOGICAL | FORMATTED
+ | EXPLAIN | FORMAT | LOGICAL | FORMATTED | CODEGEN
| TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF
| SET
| VIEW | REPLACE
@@ -724,6 +724,7 @@ DESCRIBE: 'DESCRIBE';
EXPLAIN: 'EXPLAIN';
FORMAT: 'FORMAT';
LOGICAL: 'LOGICAL';
+CODEGEN: 'CODEGEN';
CAST: 'CAST';
SHOW: 'SHOW';
TABLES: 'TABLES';
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 7efe98dd18..ff3ab7746c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -136,7 +136,8 @@ class SparkSqlAstBuilder extends AstBuilder {
// Create the explain comment.
val statement = plan(ctx.statement)
if (isExplainableStatement(statement)) {
- ExplainCommand(statement, extended = options.exists(_.EXTENDED != null))
+ ExplainCommand(statement, extended = options.exists(_.EXTENDED != null),
+ codegen = options.exists(_.CODEGEN != null))
} else {
ExplainCommand(OneRowRelation)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index f90d8717ca..4bc62cdc4a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -28,10 +28,10 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.debug._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
-
/**
* A logical command that is executed for its side-effects. `RunnableCommand`s are
* wrapped in `ExecutedCommand` during execution.
@@ -237,15 +237,22 @@ case class ExplainCommand(
logicalPlan: LogicalPlan,
override val output: Seq[Attribute] =
Seq(AttributeReference("plan", StringType, nullable = true)()),
- extended: Boolean = false)
+ extended: Boolean = false,
+ codegen: Boolean = false)
extends RunnableCommand {
// Run through the optimizer to generate the physical plan.
override def run(sqlContext: SQLContext): Seq[Row] = try {
// TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties.
val queryExecution = sqlContext.executePlan(logicalPlan)
- val outputString = if (extended) queryExecution.toString else queryExecution.simpleString
-
+ val outputString =
+ if (codegen) {
+ codegenString(queryExecution.executedPlan)
+ } else if (extended) {
+ queryExecution.toString
+ } else {
+ queryExecution.simpleString
+ }
outputString.split("\n").map(Row(_))
} catch { case cause: TreeNodeException[_] =>
("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 3a174ed94c..7b0c8ebdfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -48,6 +48,25 @@ package object debug {
// scalastyle:on println
}
+ def codegenString(plan: SparkPlan): String = {
+ val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
+ plan transform {
+ case s: WholeStageCodegen =>
+ codegenSubtrees += s
+ s
+ case s => s
+ }
+ var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
+ for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
+ output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
+ output += s
+ output += "\nGenerated code:\n"
+ val (_, source) = s.doCodeGen()
+ output += s"${CodeFormatter.format(source)}\n"
+ }
+ output
+ }
+
/**
* Augments [[SQLContext]] with debug methods.
*/
@@ -81,28 +100,7 @@ package object debug {
* WholeStageCodegen subtree).
*/
def debugCodegen(): Unit = {
- debugPrint(debugCodegenString())
- }
-
- /** Visible for testing. */
- def debugCodegenString(): String = {
- val plan = query.queryExecution.executedPlan
- val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
- plan transform {
- case s: WholeStageCodegen =>
- codegenSubtrees += s
- s
- case s => s
- }
- var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
- for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
- output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
- output += s
- output += "\nGenerated code:\n"
- val (_, source) = s.doCodeGen()
- output += s"${CodeFormatter.format(source)}\n"
- }
- output
+ debugPrint(codegenString(query.queryExecution.executedPlan))
}
}
@@ -123,6 +121,7 @@ package object debug {
/**
* A collection of metrics for each column of output.
+ *
* @param elementTypes the actual runtime types for the output. Useful when there are bugs
* causing the wrong data to be projected.
*/
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 979265e274..c0fce4b96a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -27,7 +27,7 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext {
}
test("debugCodegen") {
- val res = sqlContext.range(10).groupBy("id").count().debugCodegenString()
+ val res = codegenString(sqlContext.range(10).groupBy("id").count().queryExecution.executedPlan)
assert(res.contains("Subtree 1 / 2"))
assert(res.contains("Subtree 2 / 2"))
assert(res.contains("Object[]"))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index cd26a68f35..64d1341a47 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.execution.datasources.{BucketSpec, DataSource, LogicalRelation}
import org.apache.spark.sql.hive.HiveContext
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index b7ef5d1db7..c45d49d6c0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -101,4 +101,33 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
"Physical Plan should not contain Subquery since it's eliminated by optimizer")
}
}
+
+ test("EXPLAIN CODEGEN command") {
+ checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), true,
+ "WholeStageCodegen",
+ "Generated code:",
+ "/* 001 */ public Object generate(Object[] references) {",
+ "/* 002 */ return new GeneratedIterator(references);",
+ "/* 003 */ }"
+ )
+
+ checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), false,
+ "== Physical Plan =="
+ )
+
+ checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), true,
+ "WholeStageCodegen",
+ "Generated code:",
+ "/* 001 */ public Object generate(Object[] references) {",
+ "/* 002 */ return new GeneratedIterator(references);",
+ "/* 003 */ }"
+ )
+
+ checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), false,
+ "== Parsed Logical Plan ==",
+ "== Analyzed Logical Plan ==",
+ "== Optimized Logical Plan ==",
+ "== Physical Plan =="
+ )
+ }
}