diff options
author | Takuya UESHIN <ueshin@happy-camper.st> | 2014-05-31 11:30:03 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-05-31 11:30:16 -0700 |
commit | f9fdf32c09f089ccd7a24642294c3ae02e978d3f (patch) | |
tree | 782b6e5017ccfa598453c20396a25fda848cd1b7 /sql | |
parent | 91a64eba90032729873675c6f9c0e53b76b5cf56 (diff) | |
download | spark-f9fdf32c09f089ccd7a24642294c3ae02e978d3f.tar.gz spark-f9fdf32c09f089ccd7a24642294c3ae02e978d3f.tar.bz2 spark-f9fdf32c09f089ccd7a24642294c3ae02e978d3f.zip |
[SPARK-1947] [SQL] Child of SumDistinct or Average should be widened to prevent overflows the same as Sum.
Child of `SumDistinct` or `Average` should be widened to prevent overflows the same as `Sum`.
Author: Takuya UESHIN <ueshin@happy-camper.st>
Closes #902 from ueshin/issues/SPARK-1947 and squashes the following commits:
99c3dcb [Takuya UESHIN] Insert Cast for SumDistinct and Average.
(cherry picked from commit 3ce81494c512bc97979a743ea77ef913315f7fb6)
Signed-off-by: Michael Armbrust <michael@databricks.com>
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 4557d77160..326feea6fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -264,10 +264,22 @@ trait HiveTypeCoercion { // Skip nodes who's children have not been resolved yet. case e if !e.childrenResolved => e - // Promote SUM to largest types to prevent overflows. + // Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows. case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest. case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType)) case Sum(e @ FractionalType()) if e.dataType != DoubleType => Sum(Cast(e, DoubleType)) + + case s @ SumDistinct(e @ DecimalType()) => s // Decimal is already the biggest. + case SumDistinct(e @ IntegralType()) if e.dataType != LongType => + SumDistinct(Cast(e, LongType)) + case SumDistinct(e @ FractionalType()) if e.dataType != DoubleType => + SumDistinct(Cast(e, DoubleType)) + + case s @ Average(e @ DecimalType()) => s // Decimal is already the biggest. + case Average(e @ IntegralType()) if e.dataType != LongType => + Average(Cast(e, LongType)) + case Average(e @ FractionalType()) if e.dataType != DoubleType => + Average(Cast(e, DoubleType)) } } } |