aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Zhong <seanzhong@databricks.com>2016-06-06 22:59:25 -0700
committerCheng Lian <lian@databricks.com>2016-06-06 22:59:25 -0700
commit5f731d6859c4516941e5f90c99c966ef76268864 (patch)
treedb6497a87bd37235bde5cd7196edc9faf6ce4505
parent0e0904a2fce3c4447c24f1752307b6d01ffbd0ad (diff)
downloadspark-5f731d6859c4516941e5f90c99c966ef76268864.tar.gz
spark-5f731d6859c4516941e5f90c99c966ef76268864.tar.bz2
spark-5f731d6859c4516941e5f90c99c966ef76268864.zip
[SPARK-15792][SQL] Allows operator to change the verbosity in explain output
## What changes were proposed in this pull request? This PR allows customization of verbosity in explain output. After change, `dataframe.explain()` and `dataframe.explain(true)` has different verbosity output for physical plan. Currently, this PR only enables verbosity string for operator `HashAggregateExec` and `SortAggregateExec`. We will gradually enable verbosity string for more operators in future. **Less verbose mode:** dataframe.explain(extended = false) `output=[count(a)#85L]` is **NOT** displayed for HashAggregate. ``` scala> Seq((1,2,3)).toDF("a", "b", "c").createTempView("df2") scala> spark.sql("select count(a) from df2").explain() == Physical Plan == *HashAggregate(key=[], functions=[count(1)]) +- Exchange SinglePartition +- *HashAggregate(key=[], functions=[partial_count(1)]) +- LocalTableScan ``` **Verbose mode:** dataframe.explain(extended = true) `output=[count(a)#85L]` is displayed for HashAggregate. ``` scala> spark.sql("select count(a) from df2").explain(true) // "output=[count(a)#85L]" is added ... == Physical Plan == *HashAggregate(key=[], functions=[count(1)], output=[count(a)#85L]) +- Exchange SinglePartition +- *HashAggregate(key=[], functions=[partial_count(1)], output=[count#87L]) +- LocalTableScan ``` ## How was this patch tested? Manual test. Author: Sean Zhong <seanzhong@databricks.com> Closes #13535 from clockfly/verbose_breakdown_2.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala23
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala14
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala12
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala12
7 files changed, 55 insertions, 18 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 2ec46216e1..efe592d69d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -190,6 +190,10 @@ abstract class Expression extends TreeNode[Expression] {
case single => single :: Nil
}
+ // Marks this as final, Expression.verboseString should never be called, and thus shouldn't be
+ // overridden by concrete classes.
+ final override def verboseString: String = simpleString
+
override def simpleString: String = toString
override def toString: String = prettyName + flatArguments.mkString("(", ", ", ")")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 19a66cff4f..cf34f4b30d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -257,6 +257,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
override def simpleString: String = statePrefix + super.simpleString
+ override def verboseString: String = simpleString
+
/**
* All the subqueries of current plan.
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 22d82c6108..c67366d240 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -462,10 +462,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
/** ONE line description of this node. */
def simpleString: String = s"$nodeName $argString".trim
+ /** ONE line description of this node with more information */
+ def verboseString: String
+
override def toString: String = treeString
/** Returns a string representation of the nodes in this tree */
- def treeString: String = generateTreeString(0, Nil, new StringBuilder).toString
+ def treeString: String = treeString(verbose = true)
+
+ def treeString(verbose: Boolean): String = {
+ generateTreeString(0, Nil, new StringBuilder, verbose).toString
+ }
/**
* Returns a string representation of the nodes in this tree, where each operator is numbered.
@@ -508,6 +515,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
depth: Int,
lastChildren: Seq[Boolean],
builder: StringBuilder,
+ verbose: Boolean,
prefix: String = ""): StringBuilder = {
if (depth > 0) {
lastChildren.init.foreach { isLast =>
@@ -520,18 +528,21 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
}
builder.append(prefix)
- builder.append(simpleString)
+ val headline = if (verbose) verboseString else simpleString
+ builder.append(headline)
builder.append("\n")
if (innerChildren.nonEmpty) {
innerChildren.init.foreach(_.generateTreeString(
- depth + 2, lastChildren :+ false :+ false, builder))
- innerChildren.last.generateTreeString(depth + 2, lastChildren :+ false :+ true, builder)
+ depth + 2, lastChildren :+ false :+ false, builder, verbose))
+ innerChildren.last.generateTreeString(
+ depth + 2, lastChildren :+ false :+ true, builder, verbose)
}
if (children.nonEmpty) {
- children.init.foreach(_.generateTreeString(depth + 1, lastChildren :+ false, builder, prefix))
- children.last.generateTreeString(depth + 1, lastChildren :+ true, builder, prefix)
+ children.init.foreach(
+ _.generateTreeString(depth + 1, lastChildren :+ false, builder, verbose, prefix))
+ children.last.generateTreeString(depth + 1, lastChildren :+ true, builder, verbose, prefix)
}
builder
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 330459c11e..560214a65e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -201,24 +201,26 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
def simpleString: String = {
s"""== Physical Plan ==
- |${stringOrError(executedPlan)}
+ |${stringOrError(executedPlan.treeString(verbose = false))}
""".stripMargin.trim
}
override def toString: String = {
def output =
analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}").mkString(", ")
- val analyzedPlan =
- Seq(stringOrError(output), stringOrError(analyzed)).filter(_.nonEmpty).mkString("\n")
+ val analyzedPlan = Seq(
+ stringOrError(output),
+ stringOrError(analyzed.treeString(verbose = true))
+ ).filter(_.nonEmpty).mkString("\n")
s"""== Parsed Logical Plan ==
- |${stringOrError(logical)}
+ |${stringOrError(logical.treeString(verbose = true))}
|== Analyzed Logical Plan ==
|$analyzedPlan
|== Optimized Logical Plan ==
- |${stringOrError(optimizedPlan)}
+ |${stringOrError(optimizedPlan.treeString(verbose = true))}
|== Physical Plan ==
- |${stringOrError(executedPlan)}
+ |${stringOrError(executedPlan.treeString(verbose = true))}
""".stripMargin.trim
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index d3e8d4e8e4..e0d8e35713 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -250,8 +250,9 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with CodegenSupp
depth: Int,
lastChildren: Seq[Boolean],
builder: StringBuilder,
+ verbose: Boolean,
prefix: String = ""): StringBuilder = {
- child.generateTreeString(depth, lastChildren, builder, "")
+ child.generateTreeString(depth, lastChildren, builder, verbose, "")
}
}
@@ -407,8 +408,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
depth: Int,
lastChildren: Seq[Boolean],
builder: StringBuilder,
+ verbose: Boolean,
prefix: String = ""): StringBuilder = {
- child.generateTreeString(depth, lastChildren, builder, "*")
+ child.generateTreeString(depth, lastChildren, builder, verbose, "*")
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index f270ca0755..b617e26418 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -764,7 +764,11 @@ case class HashAggregateExec(
"""
}
- override def simpleString: String = {
+ override def verboseString: String = toString(verbose = true)
+
+ override def simpleString: String = toString(verbose = false)
+
+ private def toString(verbose: Boolean): String = {
val allAggregateExpressions = aggregateExpressions
testFallbackStartsAt match {
@@ -772,7 +776,11 @@ case class HashAggregateExec(
val keyString = groupingExpressions.mkString("[", ",", "]")
val functionString = allAggregateExpressions.mkString("[", ",", "]")
val outputString = output.mkString("[", ",", "]")
- s"HashAggregate(key=$keyString, functions=$functionString, output=$outputString)"
+ if (verbose) {
+ s"HashAggregate(key=$keyString, functions=$functionString, output=$outputString)"
+ } else {
+ s"HashAggregate(key=$keyString, functions=$functionString)"
+ }
case Some(fallbackStartsAt) =>
s"HashAggregateWithControlledFallback $groupingExpressions " +
s"$allAggregateExpressions $resultExpressions fallbackStartsAt=$fallbackStartsAt"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index 9e48ff8d70..41ba9f5b3f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -103,12 +103,20 @@ case class SortAggregateExec(
}
}
- override def simpleString: String = {
+ override def simpleString: String = toString(verbose = false)
+
+ override def verboseString: String = toString(verbose = true)
+
+ private def toString(verbose: Boolean): String = {
val allAggregateExpressions = aggregateExpressions
val keyString = groupingExpressions.mkString("[", ",", "]")
val functionString = allAggregateExpressions.mkString("[", ",", "]")
val outputString = output.mkString("[", ",", "]")
- s"SortAggregate(key=$keyString, functions=$functionString, output=$outputString)"
+ if (verbose) {
+ s"SortAggregate(key=$keyString, functions=$functionString, output=$outputString)"
+ } else {
+ s"SortAggregate(key=$keyString, functions=$functionString)"
+ }
}
}