diff options
author | Weizhong Lin <linweizhong@huawei.com> | 2015-07-08 22:18:39 -0700 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2015-07-08 22:19:19 -0700 |
commit | 851e247caad0977cfd4998254d9602624e06539f (patch) | |
tree | 9a6610548257c7853f2b881fa710f28fa74f8375 /sql/core | |
parent | c056484c0741e2a03d4a916538e1b9e3e65e71c3 (diff) | |
download | spark-851e247caad0977cfd4998254d9602624e06539f.tar.gz spark-851e247caad0977cfd4998254d9602624e06539f.tar.bz2 spark-851e247caad0977cfd4998254d9602624e06539f.zip |
[SPARK-8928] [SQL] Makes CatalystSchemaConverter sticking to 1.4.x- when handling Parquet LISTs in compatible mode
This PR is based on #7209 authored by Sephiroth-Lin.
Author: Weizhong Lin <linweizhong@huawei.com>
Closes #7314 from liancheng/spark-8928 and squashes the following commits:
75267fe [Cheng Lian] Makes CatalystSchemaConverter sticking to 1.4.x- when handling LISTs in compatible mode
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala | 6 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala | 10 |
2 files changed, 9 insertions, 7 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala index de3a72d814..1ea6926af6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala @@ -461,7 +461,8 @@ private[parquet] class CatalystSchemaConverter( field.name, Types .buildGroup(REPEATED) - .addField(convertField(StructField("element", elementType, nullable))) + // "array_element" is the name chosen by parquet-hive (1.7.0 and prior version) + .addField(convertField(StructField("array_element", elementType, nullable))) .named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME)) // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level @@ -474,7 +475,8 @@ private[parquet] class CatalystSchemaConverter( ConversionPatterns.listType( repetition, field.name, - convertField(StructField("element", elementType, nullable), REPEATED)) + // "array" is the name chosen by parquet-avro (1.7.0 and prior version) + convertField(StructField("array", elementType, nullable), REPEATED)) // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by // MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index 35d3c33f99..fa62939267 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { """ |message root { | optional group _1 (LIST) { - | repeated int32 element; + | repeated int32 array; | } |} """.stripMargin) @@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group bag { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { | optional binary _1 (UTF8); | optional group _2 (LIST) { | repeated group bag { - | optional group element { + | optional group array_element { | required int32 _1; | required double _2; | } @@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group bag { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated int32 element; + | repeated int32 array; | } |} """.stripMargin) |