aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorSean Zhong <seanzhong@databricks.com>2016-06-01 09:58:01 -0700
committerWenchen Fan <wenchen@databricks.com>2016-06-01 09:58:01 -0700
commitd5012c274036463c47a751cfe9977ade3a68b668 (patch)
treef710a65b27a56a0d30141481bb9b16af3a4f28ad /core
parent1f43562daf9454428796317203d9dcc9030a46eb (diff)
downloadspark-d5012c274036463c47a751cfe9977ade3a68b668.tar.gz
spark-d5012c274036463c47a751cfe9977ade3a68b668.tar.bz2
spark-d5012c274036463c47a751cfe9977ade3a68b668.zip
[SPARK-15495][SQL] Improve the explain output for Aggregation operator
## What changes were proposed in this pull request? This PR improves the explain output of Aggregator operator. SQL: ``` Seq((1,2,3)).toDF("a", "b", "c").createTempView("df1") spark.sql("cache table df1") spark.sql("select count(a), count(c), b from df1 group by b").explain() ``` **Before change:** ``` *TungstenAggregate(key=[b#8], functions=[count(1),count(1)], output=[count(a)#79L,count(c)#80L,b#8]) +- Exchange hashpartitioning(b#8, 200), None +- *TungstenAggregate(key=[b#8], functions=[partial_count(1),partial_count(1)], output=[b#8,count#98L,count#99L]) +- InMemoryTableScan [b#8], InMemoryRelation [a#7,b#8,c#9], true, 10000, StorageLevel(disk=true, memory=true, offheap=false, deserialized=true, replication=1), LocalTableScan [a#7,b#8,c#9], [[1,2,3]], Some(df1) `````` **After change:** ``` *Aggregate(key=[b#8], functions=[count(1),count(1)], output=[count(a)#79L,count(c)#80L,b#8]) +- Exchange hashpartitioning(b#8, 200), None +- *Aggregate(key=[b#8], functions=[partial_count(1),partial_count(1)], output=[b#8,count#98L,count#99L]) +- InMemoryTableScan [b#8], InMemoryRelation [a#7,b#8,c#9], true, 10000, StorageLevel(disk, memory, deserialized, 1 replicas), LocalTableScan [a#7,b#8,c#9], [[1,2,3]], Some(df1) ``` ## How was this patch tested? Manual test and existing UT. Author: Sean Zhong <seanzhong@databricks.com> Closes #13363 from clockfly/verbose3.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/storage/StorageLevel.scala10
1 files changed, 8 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
index 216ec07934..fad0404beb 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
@@ -120,8 +120,14 @@ class StorageLevel private(
private def readResolve(): Object = StorageLevel.getCachedStorageLevel(this)
override def toString: String = {
- s"StorageLevel(disk=$useDisk, memory=$useMemory, offheap=$useOffHeap, " +
- s"deserialized=$deserialized, replication=$replication)"
+ val disk = if (useDisk) "disk" else ""
+ val memory = if (useMemory) "memory" else ""
+ val heap = if (useOffHeap) "offheap" else ""
+ val deserialize = if (deserialized) "deserialized" else ""
+
+ val output =
+ Seq(disk, memory, heap, deserialize, s"$replication replicas").filter(_.nonEmpty)
+ s"StorageLevel(${output.mkString(", ")})"
}
override def hashCode(): Int = toInt * 41 + replication