diff options
author | Daoyuan Wang <daoyuan.wang@intel.com> | 2015-05-06 10:30:42 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2015-05-06 10:30:42 -0700 |
commit | c3eb441f5487c9b6476e1d6e2a2d852dcc43b986 (patch) | |
tree | e552f539608a3f11bdb735f0e4adc57ff79cb437 /sql/catalyst | |
parent | 150f671c286c57deaf37ab1d8f837d68b5be82a0 (diff) | |
download | spark-c3eb441f5487c9b6476e1d6e2a2d852dcc43b986.tar.gz spark-c3eb441f5487c9b6476e1d6e2a2d852dcc43b986.tar.bz2 spark-c3eb441f5487c9b6476e1d6e2a2d852dcc43b986.zip |
[SPARK-6201] [SQL] promote string and do widen types for IN
huangjs
Acutally spark sql will first go through analysis period, in which we do widen types and promote strings, and then optimization, where constant IN will be converted into INSET.
So it turn out that we only need to fix this for IN.
Author: Daoyuan Wang <daoyuan.wang@intel.com>
Closes #4945 from adrian-wang/inset and squashes the following commits:
71e05cc [Daoyuan Wang] minor fix
581fa1c [Daoyuan Wang] mysql way
f3f7baf [Daoyuan Wang] address comments
5eed4bc [Daoyuan Wang] promote string and do widen types for IN
Diffstat (limited to 'sql/catalyst')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 11 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 4 |
2 files changed, 13 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 831fb4fe95..96e2aee4de 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -69,6 +69,7 @@ trait HiveTypeCoercion { val typeCoercionRules = PropagateTypes :: ConvertNaNs :: + InConversion :: WidenTypes :: PromoteStrings :: DecimalPrecision :: @@ -287,6 +288,16 @@ trait HiveTypeCoercion { } } + /** + * Convert all expressions in in() list to the left operator type + */ + object InConversion extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { + case i @ In(a, b) if b.exists(_.dataType != a.dataType) => + i.makeCopy(Array(a, b.map(Cast(_, a.dataType)))) + } + } + // scalastyle:off /** * Calculates and propagates precision for fixed-precision decimals. Hive has a number of diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 709f7d672d..e4a60f53d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -310,8 +310,8 @@ object OptimizeIn extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) => - val hSet = list.map(e => e.eval(null)) - InSet(v, HashSet() ++ hSet) + val hSet = list.map(e => e.eval(null)) + InSet(v, HashSet() ++ hSet) } } } |