From 4bdeeb7d25453b9b50c7dc23a5c7f588754f0e52 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 14 Nov 2014 14:21:16 -0800 Subject: [SPARK-4245][SQL] Fix containsNull of the result ArrayType of CreateArray expression. The `containsNull` of the result `ArrayType` of `CreateArray` should be `true` only if the children is empty or there exists nullable child. Author: Takuya UESHIN Closes #3110 from ueshin/issues/SPARK-4245 and squashes the following commits: 6f64746 [Takuya UESHIN] Move equalsIgnoreNullability method into DataType. 5a90e02 [Takuya UESHIN] Refine InsertIntoHiveType and add some comments. cbecba8 [Takuya UESHIN] Fix a test title. 884ec37 [Takuya UESHIN] Merge branch 'master' into issues/SPARK-4245 3c5274b [Takuya UESHIN] Add tests to insert data of types ArrayType / MapType / StructType with nullability is false into Hive table. 41a94a9 [Takuya UESHIN] Replace InsertIntoTable with InsertIntoHiveTable if data types ignoring nullability are same. 43e6ef5 [Takuya UESHIN] Fix containsNull for empty array. 778e997 [Takuya UESHIN] Fix containsNull of the result ArrayType of CreateArray expression. (cherry picked from commit bbd8f5bee81d5788c356977c173dd1edc42c77a3) Signed-off-by: Michael Armbrust --- .../sql/catalyst/expressions/complexTypes.scala | 4 +++- .../apache/spark/sql/catalyst/types/dataTypes.scala | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'sql/catalyst') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala index 19421e5667..917b346086 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala @@ -115,7 +115,9 @@ case class CreateArray(children: Seq[Expression]) extends Expression { override def dataType: DataType = { assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}") - ArrayType(childTypes.headOption.getOrElse(NullType)) + ArrayType( + childTypes.headOption.getOrElse(NullType), + containsNull = children.exists(_.nullable)) } override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala index 5dd19dd12d..ff1dc03069 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala @@ -171,6 +171,27 @@ object DataType { case _ => } } + + /** + * Compares two types, ignoring nullability of ArrayType, MapType, StructType. + */ + def equalsIgnoreNullability(left: DataType, right: DataType): Boolean = { + (left, right) match { + case (ArrayType(leftElementType, _), ArrayType(rightElementType, _)) => + equalsIgnoreNullability(leftElementType, rightElementType) + case (MapType(leftKeyType, leftValueType, _), MapType(rightKeyType, rightValueType, _)) => + equalsIgnoreNullability(leftKeyType, rightKeyType) && + equalsIgnoreNullability(leftValueType, rightValueType) + case (StructType(leftFields), StructType(rightFields)) => + leftFields.size == rightFields.size && + leftFields.zip(rightFields) + .forall{ + case (left, right) => + left.name == right.name && equalsIgnoreNullability(left.dataType, right.dataType) + } + case (left, right) => left == right + } + } } abstract class DataType { -- cgit v1.2.3