diff options
author | tianyi <tianyi.asiainfo@gmail.com> | 2015-02-12 22:18:39 -0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2015-02-12 22:19:01 -0800 |
commit | b9f332ab680f671a368a8411679bb4c52d495486 (patch) | |
tree | be7b4b4b03dd0ca91f959bb7b21e3bd9d8bb8936 | |
parent | edbac178d186f6936408b211385a5fea9e4f4603 (diff) | |
download | spark-b9f332ab680f671a368a8411679bb4c52d495486.tar.gz spark-b9f332ab680f671a368a8411679bb4c52d495486.tar.bz2 spark-b9f332ab680f671a368a8411679bb4c52d495486.zip |
[SPARK-3365][SQL]Wrong schema generated for List type
This PR fix the issue SPARK-3365.
The reason is Spark generated wrong schema for the type `List` in `ScalaReflection.scala`
for example:
the generated schema for type `Seq[String]` is:
```
{"name":"x","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}`
```
the generated schema for type `List[String]` is:
```
{"name":"x","type":{"type":"struct","fields":[]},"nullable":true,"metadata":{}}`
```
Author: tianyi <tianyi.asiainfo@gmail.com>
Closes #4581 from tianyi/SPARK-3365 and squashes the following commits:
a097e86 [tianyi] change the order of resolution in ScalaReflection.scala
(cherry picked from commit 1c8633f3fe9d814c83384e339b958740c250c00c)
Signed-off-by: Cheng Lian <lian@databricks.com>
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala | 30 | ||||
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala | 5 |
2 files changed, 20 insertions, 15 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 5d9c331ca5..11fd443733 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -122,6 +122,21 @@ trait ScalaReflection { case t if t <:< typeOf[Option[_]] => val TypeRef(_, _, Seq(optType)) = t Schema(schemaFor(optType).dataType, nullable = true) + // Need to decide if we actually need a special type here. + case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true) + case t if t <:< typeOf[Array[_]] => + val TypeRef(_, _, Seq(elementType)) = t + val Schema(dataType, nullable) = schemaFor(elementType) + Schema(ArrayType(dataType, containsNull = nullable), nullable = true) + case t if t <:< typeOf[Seq[_]] => + val TypeRef(_, _, Seq(elementType)) = t + val Schema(dataType, nullable) = schemaFor(elementType) + Schema(ArrayType(dataType, containsNull = nullable), nullable = true) + case t if t <:< typeOf[Map[_, _]] => + val TypeRef(_, _, Seq(keyType, valueType)) = t + val Schema(valueDataType, valueNullable) = schemaFor(valueType) + Schema(MapType(schemaFor(keyType).dataType, + valueDataType, valueContainsNull = valueNullable), nullable = true) case t if t <:< typeOf[Product] => val formalTypeArgs = t.typeSymbol.asClass.typeParams val TypeRef(_, _, actualTypeArgs) = t @@ -144,21 +159,6 @@ trait ScalaReflection { schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)) StructField(p.name.toString, dataType, nullable) }), nullable = true) - // Need to decide if we actually need a special type here. - case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true) - case t if t <:< typeOf[Array[_]] => - val TypeRef(_, _, Seq(elementType)) = t - val Schema(dataType, nullable) = schemaFor(elementType) - Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t <:< typeOf[Seq[_]] => - val TypeRef(_, _, Seq(elementType)) = t - val Schema(dataType, nullable) = schemaFor(elementType) - Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t <:< typeOf[Map[_, _]] => - val TypeRef(_, _, Seq(keyType, valueType)) = t - val Schema(valueDataType, valueNullable) = schemaFor(valueType) - Schema(MapType(schemaFor(keyType).dataType, - valueDataType, valueContainsNull = valueNullable), nullable = true) case t if t <:< typeOf[String] => Schema(StringType, nullable = true) case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true) case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala index d0f547d187..eee00e3f7e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala @@ -61,6 +61,7 @@ case class OptionalData( case class ComplexData( arrayField: Seq[Int], arrayField1: Array[Int], + arrayField2: List[Int], arrayFieldContainsNull: Seq[java.lang.Integer], mapField: Map[Int, Long], mapFieldValueContainsNull: Map[Int, java.lang.Long], @@ -138,6 +139,10 @@ class ScalaReflectionSuite extends FunSuite { ArrayType(IntegerType, containsNull = false), nullable = true), StructField( + "arrayField2", + ArrayType(IntegerType, containsNull = false), + nullable = true), + StructField( "arrayFieldContainsNull", ArrayType(IntegerType, containsNull = true), nullable = true), |