aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala13
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala6
2 files changed, 10 insertions, 9 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
index a21ab1dbb2..2d237da81c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
@@ -426,13 +426,14 @@ private[parquet] class CatalystSchemaConverter(
// ArrayType and MapType (for Spark versions <= 1.4.x)
// ===================================================
- // Spark 1.4.x and prior versions convert ArrayType with nullable elements into a 3-level
- // LIST structure. This behavior mimics parquet-hive (1.6.0rc3). Note that this case is
- // covered by the backwards-compatibility rules implemented in `isElementType()`.
+ // Spark 1.4.x and prior versions convert `ArrayType` with nullable elements into a 3-level
+ // `LIST` structure. This behavior is somewhat a hybrid of parquet-hive and parquet-avro
+ // (1.6.0rc3): the 3-level structure is similar to parquet-hive while the 3rd level element
+ // field name "array" is borrowed from parquet-avro.
case ArrayType(elementType, nullable @ true) if !followParquetFormatSpec =>
// <list-repetition> group <name> (LIST) {
// optional group bag {
- // repeated <element-type> element;
+ // repeated <element-type> array;
// }
// }
ConversionPatterns.listType(
@@ -441,8 +442,8 @@ private[parquet] class CatalystSchemaConverter(
Types
.buildGroup(REPEATED)
// "array_element" is the name chosen by parquet-hive (1.7.0 and prior version)
- .addField(convertField(StructField("array_element", elementType, nullable)))
- .named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME))
+ .addField(convertField(StructField("array", elementType, nullable)))
+ .named("bag"))
// Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level
// LIST structure. This behavior mimics parquet-avro (1.6.0rc3). Note that this case is
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 28c59a4abd..5331d7c035 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -197,7 +197,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
|message root {
| optional group _1 (LIST) {
| repeated group bag {
- | optional int32 array_element;
+ | optional int32 array;
| }
| }
|}
@@ -266,7 +266,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
| optional binary _1 (UTF8);
| optional group _2 (LIST) {
| repeated group bag {
- | optional group array_element {
+ | optional group array {
| required int32 _1;
| required double _2;
| }
@@ -645,7 +645,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
"""message root {
| optional group f1 (LIST) {
| repeated group bag {
- | optional int32 array_element;
+ | optional int32 array;
| }
| }
|}