diff options
author | wangzhenhua <wangzhenhua@huawei.com> | 2017-02-24 10:24:59 -0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2017-02-24 10:24:59 -0800 |
commit | 69d0da6373979ce5b2bcd52933b5a7660d893e88 (patch) | |
tree | 0604364d8facb22837c95564d6df4d05736477c7 /sql/hive | |
parent | 05954f32e9bde56dc1f9a72028900d705185f6d7 (diff) | |
download | spark-69d0da6373979ce5b2bcd52933b5a7660d893e88.tar.gz spark-69d0da6373979ce5b2bcd52933b5a7660d893e88.tar.bz2 spark-69d0da6373979ce5b2bcd52933b5a7660d893e88.zip |
[SPARK-17078][SQL] Show stats when explain
## What changes were proposed in this pull request?
Currently we can only check the estimated stats in logical plans by debugging. We need to provide an easier and more efficient way for developers/users.
In this pr, we add EXPLAIN COST command to show stats in the optimized logical plan.
E.g.
```
spark-sql> EXPLAIN COST select count(1) from store_returns;
...
== Optimized Logical Plan ==
Aggregate [count(1) AS count(1)#24L], Statistics(sizeInBytes=16.0 B, rowCount=1, isBroadcastable=false)
+- Project, Statistics(sizeInBytes=4.3 GB, rowCount=5.76E+8, isBroadcastable=false)
+- Relation[sr_returned_date_sk#3,sr_return_time_sk#4,sr_item_sk#5,sr_customer_sk#6,sr_cdemo_sk#7,sr_hdemo_sk#8,sr_addr_sk#9,sr_store_sk#10,sr_reason_sk#11,sr_ticket_number#12,sr_return_quantity#13,sr_return_amt#14,sr_return_tax#15,sr_return_amt_inc_tax#16,sr_fee#17,sr_return_ship_cost#18,sr_refunded_cash#19,sr_reversed_charge#20,sr_store_credit#21,sr_net_loss#22] parquet, Statistics(sizeInBytes=28.6 GB, rowCount=5.76E+8, isBroadcastable=false)
...
```
## How was this patch tested?
Add test cases.
Author: wangzhenhua <wangzhenhua@huawei.com>
Author: Zhenhua Wang <wzh_zju@163.com>
Closes #16594 from wzhfy/showStats.
Diffstat (limited to 'sql/hive')
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala index f9751e3d5f..cfca1d7983 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala @@ -27,6 +27,19 @@ import org.apache.spark.sql.test.SQLTestUtils */ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { + test("show cost in explain command") { + // Only has sizeInBytes before ANALYZE command + checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), "sizeInBytes") + checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM src "), "rowCount") + + // Has both sizeInBytes and rowCount after ANALYZE command + sql("ANALYZE TABLE src COMPUTE STATISTICS") + checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), "sizeInBytes", "rowCount") + + // No cost information + checkKeywordsNotExist(sql("EXPLAIN SELECT * FROM src "), "sizeInBytes", "rowCount") + } + test("explain extended command") { checkKeywordsExist(sql(" explain select * from src where key=123 "), "== Physical Plan ==") |