aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2016-03-03 17:36:48 -0800
committerYin Huai <yhuai@databricks.com>2016-03-03 17:36:48 -0800
commitb373a888621ba6f0dd499f47093d4e2e42086dfc (patch)
treee68780effb3df46a612a076fe425920def81015b /sql/catalyst
parentad0de99f3d3167990d501297f1df069fe15e0678 (diff)
downloadspark-b373a888621ba6f0dd499f47093d4e2e42086dfc.tar.gz
spark-b373a888621ba6f0dd499f47093d4e2e42086dfc.tar.bz2
spark-b373a888621ba6f0dd499f47093d4e2e42086dfc.zip
[SPARK-13415][SQL] Visualize subquery in SQL web UI
## What changes were proposed in this pull request? This PR support visualization for subquery in SQL web UI, also improve the explain of subquery, especially when it's used together with whole stage codegen. For example: ```python >>> sqlContext.range(100).registerTempTable("range") >>> sqlContext.sql("select id / (select sum(id) from range) from range where id > (select id from range limit 1)").explain(True) == Parsed Logical Plan == 'Project [unresolvedalias(('id / subquery#9), None)] : +- 'SubqueryAlias subquery#9 : +- 'Project [unresolvedalias('sum('id), None)] : +- 'UnresolvedRelation `range`, None +- 'Filter ('id > subquery#8) : +- 'SubqueryAlias subquery#8 : +- 'GlobalLimit 1 : +- 'LocalLimit 1 : +- 'Project [unresolvedalias('id, None)] : +- 'UnresolvedRelation `range`, None +- 'UnresolvedRelation `range`, None == Analyzed Logical Plan == (id / scalarsubquery()): double Project [(cast(id#0L as double) / cast(subquery#9 as double)) AS (id / scalarsubquery())#11] : +- SubqueryAlias subquery#9 : +- Aggregate [(sum(id#0L),mode=Complete,isDistinct=false) AS sum(id)#10L] : +- SubqueryAlias range : +- Range 0, 100, 1, 4, [id#0L] +- Filter (id#0L > subquery#8) : +- SubqueryAlias subquery#8 : +- GlobalLimit 1 : +- LocalLimit 1 : +- Project [id#0L] : +- SubqueryAlias range : +- Range 0, 100, 1, 4, [id#0L] +- SubqueryAlias range +- Range 0, 100, 1, 4, [id#0L] == Optimized Logical Plan == Project [(cast(id#0L as double) / cast(subquery#9 as double)) AS (id / scalarsubquery())#11] : +- SubqueryAlias subquery#9 : +- Aggregate [(sum(id#0L),mode=Complete,isDistinct=false) AS sum(id)#10L] : +- Range 0, 100, 1, 4, [id#0L] +- Filter (id#0L > subquery#8) : +- SubqueryAlias subquery#8 : +- GlobalLimit 1 : +- LocalLimit 1 : +- Project [id#0L] : +- Range 0, 100, 1, 4, [id#0L] +- Range 0, 100, 1, 4, [id#0L] == Physical Plan == WholeStageCodegen : +- Project [(cast(id#0L as double) / cast(subquery#9 as double)) AS (id / scalarsubquery())#11] : : +- Subquery subquery#9 : : +- WholeStageCodegen : : : +- TungstenAggregate(key=[], functions=[(sum(id#0L),mode=Final,isDistinct=false)], output=[sum(id)#10L]) : : : +- INPUT : : +- Exchange SinglePartition, None : : +- WholeStageCodegen : : : +- TungstenAggregate(key=[], functions=[(sum(id#0L),mode=Partial,isDistinct=false)], output=[sum#14L]) : : : +- Range 0, 1, 4, 100, [id#0L] : +- Filter (id#0L > subquery#8) : : +- Subquery subquery#8 : : +- CollectLimit 1 : : +- WholeStageCodegen : : : +- Project [id#0L] : : : +- Range 0, 1, 4, 100, [id#0L] : +- Range 0, 1, 4, 100, [id#0L] ``` The web UI looks like: ![subquery](https://cloud.githubusercontent.com/assets/40902/13377963/932bcbae-dda7-11e5-82f7-03c9be85d77c.png) This PR also change the tree structure of WholeStageCodegen to make it consistent than others. Before this change, Both WholeStageCodegen and InputAdapter hold a references to the same plans, those could be updated without notify another, causing problems, this is discovered by #11403 . ## How was this patch tested? Existing tests, also manual tests with the example query, check the explain and web UI. Author: Davies Liu <davies@databricks.com> Closes #11417 from davies/viz_subquery.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala10
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala49
2 files changed, 56 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 3ff37fffbd..0e0453b517 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -229,8 +229,12 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
override def simpleString: String = statePrefix + super.simpleString
- override def treeChildren: Seq[PlanType] = {
- val subqueries = expressions.flatMap(_.collect {case e: SubqueryExpression => e})
- children ++ subqueries.map(e => e.plan.asInstanceOf[PlanType])
+ /**
+ * All the subqueries of current plan.
+ */
+ def subqueries: Seq[PlanType] = {
+ expressions.flatMap(_.collect {case e: SubqueryExpression => e.plan.asInstanceOf[PlanType]})
}
+
+ override def innerChildren: Seq[PlanType] = subqueries
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 2d0bf6b375..6b7997e903 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -447,10 +447,53 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
/**
* All the nodes that will be used to generate tree string.
+ *
+ * For example:
+ *
+ * WholeStageCodegen
+ * +-- SortMergeJoin
+ * |-- InputAdapter
+ * | +-- Sort
+ * +-- InputAdapter
+ * +-- Sort
+ *
+ * the treeChildren of WholeStageCodegen will be Seq(Sort, Sort), it will generate a tree string
+ * like this:
+ *
+ * WholeStageCodegen
+ * : +- SortMergeJoin
+ * : :- INPUT
+ * : :- INPUT
+ * :- Sort
+ * :- Sort
*/
protected def treeChildren: Seq[BaseType] = children
/**
+ * All the nodes that are parts of this node.
+ *
+ * For example:
+ *
+ * WholeStageCodegen
+ * +- SortMergeJoin
+ * |-- InputAdapter
+ * | +-- Sort
+ * +-- InputAdapter
+ * +-- Sort
+ *
+ * the innerChildren of WholeStageCodegen will be Seq(SortMergeJoin), it will generate a tree
+ * string like this:
+ *
+ * WholeStageCodegen
+ * : +- SortMergeJoin
+ * : :- INPUT
+ * : :- INPUT
+ * :- Sort
+ * :- Sort
+ */
+ protected def innerChildren: Seq[BaseType] = Nil
+
+ /**
* Appends the string represent of this node and its children to the given StringBuilder.
*
* The `i`-th element in `lastChildren` indicates whether the ancestor of the current node at
@@ -472,6 +515,12 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
builder.append(simpleString)
builder.append("\n")
+ if (innerChildren.nonEmpty) {
+ innerChildren.init.foreach(_.generateTreeString(
+ depth + 2, lastChildren :+ false :+ false, builder))
+ innerChildren.last.generateTreeString(depth + 2, lastChildren :+ false :+ true, builder)
+ }
+
if (treeChildren.nonEmpty) {
treeChildren.init.foreach(_.generateTreeString(depth + 1, lastChildren :+ false, builder))
treeChildren.last.generateTreeString(depth + 1, lastChildren :+ true, builder)