aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorZongheng Yang <zongheng.y@gmail.com>2014-06-09 16:47:44 -0700
committerMichael Armbrust <michael@databricks.com>2014-06-09 16:47:44 -0700
commita9ec033c8cf489898cc47e2043bd9e86b7df1ff8 (patch)
tree5b73a963028a019c1b89f15f61326097fcf87d4b /sql/core
parentc6e041d171e3d9882ab15e2bd7a7217dc19647f6 (diff)
downloadspark-a9ec033c8cf489898cc47e2043bd9e86b7df1ff8.tar.gz
spark-a9ec033c8cf489898cc47e2043bd9e86b7df1ff8.tar.bz2
spark-a9ec033c8cf489898cc47e2043bd9e86b7df1ff8.zip
[SPARK-1704][SQL] Fully support EXPLAIN commands as SchemaRDD.
This PR attempts to resolve [SPARK-1704](https://issues.apache.org/jira/browse/SPARK-1704) by introducing a physical plan for EXPLAIN commands, which just prints out the debug string (containing various SparkSQL's plans) of the corresponding QueryExecution for the actual query. Author: Zongheng Yang <zongheng.y@gmail.com> Closes #1003 from concretevitamin/explain-cmd and squashes the following commits: 5b7911f [Zongheng Yang] Add a regression test. 1bfa379 [Zongheng Yang] Modify output(). 719ada9 [Zongheng Yang] Override otherCopyArgs for ExplainCommandPhysical. 4318fd7 [Zongheng Yang] Make all output one Row. 439c6ab [Zongheng Yang] Minor cleanups. 408f574 [Zongheng Yang] SPARK-1704: Add CommandStrategy and ExplainCommandPhysical.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala11
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala32
3 files changed, 49 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 5626f0da22..fde4c485b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
val sparkContext = self.sparkContext
val strategies: Seq[Strategy] =
+ CommandStrategy(self) ::
TakeOrdered ::
PartialAggregation ::
LeftSemiJoin ::
@@ -256,6 +257,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil
}
+ // TODO: or should we make QueryExecution protected[sql]?
+ protected[sql] def mkQueryExecution(plan: LogicalPlan) = new QueryExecution {
+ val logical = plan
+ }
+
/**
* The primary workflow for executing relational queries using Spark. Designed to allow easy
* access to the intermediate phases of query execution for developers.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 6463f47510..295c265b16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -233,4 +233,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case _ => Nil
}
}
+
+ // TODO: this should be merged with SPARK-1508's SetCommandStrategy
+ case class CommandStrategy(context: SQLContext) extends Strategy {
+ def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+ case logical.ExplainCommand(child) =>
+ val qe = context.mkQueryExecution(child)
+ Seq(execution.ExplainCommandPhysical(qe.executedPlan, plan.output)(context))
+ case _ => Nil
+ }
+ }
+
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
new file mode 100644
index 0000000000..5371d2f479
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{SQLContext, Row}
+import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
+
+case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
+ (@transient context: SQLContext) extends UnaryNode {
+ def execute(): RDD[Row] = {
+ val planString = new GenericRow(Array[Any](child.toString))
+ context.sparkContext.parallelize(Seq(planString))
+ }
+
+ override def otherCopyArgs = context :: Nil
+}