aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/main
diff options
context:
space:
mode:
authorDaoyuan Wang <daoyuan.wang@intel.com>2014-09-10 10:48:33 -0700
committerMichael Armbrust <michael@databricks.com>2014-09-10 10:48:36 -0700
commitf0c87dc86ae65a39cd19370d8d960b4a60854517 (patch)
treebd58fa8328c4ff316175191e3450ca6aab64be65 /sql/core/src/main
parenta0283300c4af5e64a1dc06193245daa1e746b5f4 (diff)
downloadspark-f0c87dc86ae65a39cd19370d8d960b4a60854517.tar.gz
spark-f0c87dc86ae65a39cd19370d8d960b4a60854517.tar.bz2
spark-f0c87dc86ae65a39cd19370d8d960b4a60854517.zip
[SPARK-3363][SQL] Type Coercion should promote null to all other types.
Type Coercion should support every type to have null value Author: Daoyuan Wang <daoyuan.wang@intel.com> Author: Michael Armbrust <michael@databricks.com> Closes #2246 from adrian-wang/spark3363-0 and squashes the following commits: c6241de [Daoyuan Wang] minor code clean 595b417 [Daoyuan Wang] Merge pull request #2 from marmbrus/pr/2246 832e640 [Michael Armbrust] reduce code duplication ef6f986 [Daoyuan Wang] make double boolean miss in jsonRDD compatibleType c619f0a [Daoyuan Wang] Type Coercion should support every type to have null value
Diffstat (limited to 'sql/core/src/main')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala51
1 files changed, 22 insertions, 29 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index 1c0b03c684..70062eae3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -125,38 +125,31 @@ private[sql] object JsonRDD extends Logging {
* Returns the most general data type for two given data types.
*/
private[json] def compatibleType(t1: DataType, t2: DataType): DataType = {
- // Try and find a promotion rule that contains both types in question.
- val applicableConversion = HiveTypeCoercion.allPromotions.find(p => p.contains(t1) && p
- .contains(t2))
-
- // If found return the widest common type, otherwise None
- val returnType = applicableConversion.map(_.filter(t => t == t1 || t == t2).last)
-
- if (returnType.isDefined) {
- returnType.get
- } else {
- // t1 or t2 is a StructType, ArrayType, or an unexpected type.
- (t1, t2) match {
- case (other: DataType, NullType) => other
- case (NullType, other: DataType) => other
- case (StructType(fields1), StructType(fields2)) => {
- val newFields = (fields1 ++ fields2).groupBy(field => field.name).map {
- case (name, fieldTypes) => {
- val dataType = fieldTypes.map(field => field.dataType).reduce(
- (type1: DataType, type2: DataType) => compatibleType(type1, type2))
- StructField(name, dataType, true)
+ HiveTypeCoercion.findTightestCommonType(t1, t2) match {
+ case Some(commonType) => commonType
+ case None =>
+ // t1 or t2 is a StructType, ArrayType, or an unexpected type.
+ (t1, t2) match {
+ case (other: DataType, NullType) => other
+ case (NullType, other: DataType) => other
+ case (StructType(fields1), StructType(fields2)) => {
+ val newFields = (fields1 ++ fields2).groupBy(field => field.name).map {
+ case (name, fieldTypes) => {
+ val dataType = fieldTypes.map(field => field.dataType).reduce(
+ (type1: DataType, type2: DataType) => compatibleType(type1, type2))
+ StructField(name, dataType, true)
+ }
}
+ StructType(newFields.toSeq.sortBy {
+ case StructField(name, _, _) => name
+ })
}
- StructType(newFields.toSeq.sortBy {
- case StructField(name, _, _) => name
- })
+ case (ArrayType(elementType1, containsNull1), ArrayType(elementType2, containsNull2)) =>
+ ArrayType(compatibleType(elementType1, elementType2), containsNull1 || containsNull2)
+ // TODO: We should use JsonObjectStringType to mark that values of field will be
+ // strings and every string is a Json object.
+ case (_, _) => StringType
}
- case (ArrayType(elementType1, containsNull1), ArrayType(elementType2, containsNull2)) =>
- ArrayType(compatibleType(elementType1, elementType2), containsNull1 || containsNull2)
- // TODO: We should use JsonObjectStringType to mark that values of field will be
- // strings and every string is a Json object.
- case (_, _) => StringType
- }
}
}