aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-08-07 13:41:45 -0700
committerReynold Xin <rxin@databricks.com>2015-08-07 13:41:45 -0700
commit05d04e10a8ea030bea840c3c5ba93ecac479a039 (patch)
tree3fe88450240b49d4ecd3826d1ca33e3bef0c4dba /sql/catalyst
parentaeddeafc03d77a5149d2c8f9489b0ca83e6b3e03 (diff)
downloadspark-05d04e10a8ea030bea840c3c5ba93ecac479a039.tar.gz
spark-05d04e10a8ea030bea840c3c5ba93ecac479a039.tar.bz2
spark-05d04e10a8ea030bea840c3c5ba93ecac479a039.zip
[SPARK-9733][SQL] Improve physical plan explain for data sources
All data sources show up as "PhysicalRDD" in physical plan explain. It'd be better if we can show the name of the data source. Without this patch: ``` == Physical Plan == NewAggregate with UnsafeHybridAggregationIterator ArrayBuffer(date#0, cat#1) ArrayBuffer((sum(CAST((CAST(count#2, IntegerType) + 1), LongType))2,mode=Final,isDistinct=false)) Exchange hashpartitioning(date#0,cat#1) NewAggregate with UnsafeHybridAggregationIterator ArrayBuffer(date#0, cat#1) ArrayBuffer((sum(CAST((CAST(count#2, IntegerType) + 1), LongType))2,mode=Partial,isDistinct=false)) PhysicalRDD [date#0,cat#1,count#2], MapPartitionsRDD[3] at ``` With this patch: ``` == Physical Plan == TungstenAggregate(key=[date#0,cat#1], value=[(sum(CAST((CAST(count#2, IntegerType) + 1), LongType)),mode=Final,isDistinct=false)] Exchange hashpartitioning(date#0,cat#1) TungstenAggregate(key=[date#0,cat#1], value=[(sum(CAST((CAST(count#2, IntegerType) + 1), LongType)),mode=Partial,isDistinct=false)] ConvertToUnsafe Scan ParquetRelation[file:/scratch/rxin/spark/sales4][date#0,cat#1,count#2] ``` Author: Reynold Xin <rxin@databricks.com> Closes #8024 from rxin/SPARK-9733 and squashes the following commits: 811b90e [Reynold Xin] Fixed Python test case. 52cab77 [Reynold Xin] Cast. eea9ccc [Reynold Xin] Fix test case. fcecb22 [Reynold Xin] [SPARK-9733][SQL] Improve explain message for data source scan node.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala2
2 files changed, 3 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 39f99700c8..946c5a9c04 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -107,6 +107,8 @@ object Cast {
case class Cast(child: Expression, dataType: DataType)
extends UnaryExpression with CodegenFallback {
+ override def toString: String = s"cast($child as ${dataType.simpleString})"
+
override def checkInputDataTypes(): TypeCheckResult = {
if (Cast.canCast(child.dataType, dataType)) {
TypeCheckResult.TypeCheckSuccess
@@ -118,8 +120,6 @@ case class Cast(child: Expression, dataType: DataType)
override def nullable: Boolean = Cast.forceNullable(child.dataType, dataType) || child.nullable
- override def toString: String = s"CAST($child, $dataType)"
-
// [[func]] assumes the input is no longer null because eval already does the null check.
@inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index 4abfdfe87d..576d8c7a3a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -93,7 +93,7 @@ private[sql] case class AggregateExpression2(
AttributeSet(childReferences)
}
- override def toString: String = s"(${aggregateFunction}2,mode=$mode,isDistinct=$isDistinct)"
+ override def toString: String = s"(${aggregateFunction},mode=$mode,isDistinct=$isDistinct)"
}
abstract class AggregateFunction2