[SPARK-8928] [SQL] Makes CatalystSchemaConverter sticking to 1.4.x- when handling Parquet LISTs in compatible mode

This PR is based on #7209 authored by Sephiroth-Lin. Author: Weizhong Lin <linweizhong@huawei.com> Closes #7304 from liancheng/spark-8928 and squashes the following commits: 75267fe [Cheng Lian] Makes CatalystSchemaConverter sticking to 1.4.x- when handling LISTs in compatible mode
author: Cheng Lian <lian@databricks.com> 2015-07-08 22:09:12 -0700
committer: Cheng Lian <lian@databricks.com> 2015-07-08 22:09:14 -0700
commit: 3dab0da42940a46f0c4aa4853bdb5c64c4cb2613 (patch)
tree: 9a6610548257c7853f2b881fa710f28fa74f8375
parent: a240bf3b44b15d0da5182d6ebec281dbdc5439e8 (diff)
download: spark-3dab0da42940a46f0c4aa4853bdb5c64c4cb2613.tar.gz
spark-3dab0da42940a46f0c4aa4853bdb5c64c4cb2613.tar.bz2
spark-3dab0da42940a46f0c4aa4853bdb5c64c4cb2613.zip
2 files changed, 9 insertions, 7 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala
index de3a72d814..1ea6926af6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala
@@ -461,7 +461,8 @@ private[parquet] class CatalystSchemaConverter(
           field.name,
           Types
             .buildGroup(REPEATED)
-            .addField(convertField(StructField("element", elementType, nullable)))
+            // "array_element" is the name chosen by parquet-hive (1.7.0 and prior version)
+            .addField(convertField(StructField("array_element", elementType, nullable)))
             .named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME))
 
       // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level
@@ -474,7 +475,8 @@ private[parquet] class CatalystSchemaConverter(
         ConversionPatterns.listType(
           repetition,
           field.name,
-          convertField(StructField("element", elementType, nullable), REPEATED))
+          // "array" is the name chosen by parquet-avro (1.7.0 and prior version)
+          convertField(StructField("array", elementType, nullable), REPEATED))
 
       // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by
       // MAP_KEY_VALUE.  This is covered by `convertGroupField(field: GroupType): DataType`.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
index 35d3c33f99..fa62939267 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
@@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
     """
       |message root {
       |  optional group _1 (LIST) {
-      |    repeated int32 element;
+      |    repeated int32 array;
       |  }
       |}
     """.stripMargin)
@@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
       |message root {
       |  optional group _1 (LIST) {
       |    repeated group bag {
-      |      optional int32 element;
+      |      optional int32 array_element;
       |    }
       |  }
       |}
@@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
       |        optional binary _1 (UTF8);
       |        optional group _2 (LIST) {
       |          repeated group bag {
-      |            optional group element {
+      |            optional group array_element {
       |              required int32 _1;
       |              required double _2;
       |            }
@@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     """message root {
       |  optional group f1 (LIST) {
       |    repeated group bag {
-      |      optional int32 element;
+      |      optional int32 array_element;
       |    }
       |  }
       |}
@@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
         nullable = true))),
     """message root {
       |  optional group f1 (LIST) {
-      |    repeated int32 element;
+      |    repeated int32 array;
       |  }
       |}
     """.stripMargin)
author	Cheng Lian <lian@databricks.com>	2015-07-08 22:09:12 -0700
committer	Cheng Lian <lian@databricks.com>	2015-07-08 22:09:14 -0700
commit	3dab0da42940a46f0c4aa4853bdb5c64c4cb2613 (patch)
tree	9a6610548257c7853f2b881fa710f28fa74f8375
parent	a240bf3b44b15d0da5182d6ebec281dbdc5439e8 (diff)
download	spark-3dab0da42940a46f0c4aa4853bdb5c64c4cb2613.tar.gz spark-3dab0da42940a46f0c4aa4853bdb5c64c4cb2613.tar.bz2 spark-3dab0da42940a46f0c4aa4853bdb5c64c4cb2613.zip