diff options
author | Michael Armbrust <michael@databricks.com> | 2014-10-24 18:36:35 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2014-10-24 18:36:35 -0700 |
commit | 3a845d3c048eebb0bddb3937128746fde3e8e4d8 (patch) | |
tree | 192dbba101a08ff02de979efba297587f30fd721 /sql/catalyst | |
parent | 898b22ab1fe90e8a3935b19566465046f2256fa6 (diff) | |
download | spark-3a845d3c048eebb0bddb3937128746fde3e8e4d8.tar.gz spark-3a845d3c048eebb0bddb3937128746fde3e8e4d8.tar.bz2 spark-3a845d3c048eebb0bddb3937128746fde3e8e4d8.zip |
[SQL] Update Hive test harness for Hive 12 and 13
As part of the upgrade I also copy the newest version of the query tests, and whitelist a bunch of new ones that are now passing.
Author: Michael Armbrust <michael@databricks.com>
Closes #2936 from marmbrus/fix13tests and squashes the following commits:
d9cbdab [Michael Armbrust] Remove user specific tests
65801cd [Michael Armbrust] style and rat
8f6b09a [Michael Armbrust] Update test harness to work with both Hive 12 and 13.
f044843 [Michael Armbrust] Update Hive query tests and golden files to 0.13
Diffstat (limited to 'sql/catalyst')
2 files changed, 38 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 7c480de107..2b69c02b28 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -52,6 +52,8 @@ object HiveTypeCoercion { */ trait HiveTypeCoercion { + import HiveTypeCoercion._ + val typeCoercionRules = PropagateTypes :: ConvertNaNs :: @@ -340,6 +342,13 @@ trait HiveTypeCoercion { // Skip nodes who's children have not been resolved yet. case e if !e.childrenResolved => e + case a @ CreateArray(children) if !a.resolved => + val commonType = a.childTypes.reduce( + (a,b) => + findTightestCommonType(a,b).getOrElse(StringType)) + CreateArray( + children.map(c => if (c.dataType == commonType) c else Cast(c, commonType))) + // Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows. case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest. case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType)) @@ -356,6 +365,10 @@ trait HiveTypeCoercion { Average(Cast(e, LongType)) case Average(e @ FractionalType()) if e.dataType != DoubleType => Average(Cast(e, DoubleType)) + + // Hive lets you do aggregation of timestamps... for some reason + case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType)) + case Average(e @ TimestampType()) => Average(Cast(e, DoubleType)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala index dafd745ec9..19421e5667 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala @@ -101,3 +101,28 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio override def toString = s"$child.$fieldName" } + +/** + * Returns an Array containing the evaluation of all children expressions. + */ +case class CreateArray(children: Seq[Expression]) extends Expression { + override type EvaluatedType = Any + + lazy val childTypes = children.map(_.dataType).distinct + + override lazy val resolved = + childrenResolved && childTypes.size <= 1 + + override def dataType: DataType = { + assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}") + ArrayType(childTypes.headOption.getOrElse(NullType)) + } + + override def nullable: Boolean = false + + override def eval(input: Row): Any = { + children.map(_.eval(input)) + } + + override def toString = s"Array(${children.mkString(",")})" +} |