aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala5
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala18
2 files changed, 19 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 410600b052..3516cfe680 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -758,12 +758,13 @@ private[sql] object ParquetRelation2 extends Logging {
|${parquetSchema.prettyJson}
""".stripMargin
- assert(metastoreSchema.size == parquetSchema.size, schemaConflictMessage)
+ assert(metastoreSchema.size <= parquetSchema.size, schemaConflictMessage)
val ordinalMap = metastoreSchema.zipWithIndex.map {
case (field, index) => field.name.toLowerCase -> index
}.toMap
- val reorderedParquetSchema = parquetSchema.sortBy(f => ordinalMap(f.name.toLowerCase))
+ val reorderedParquetSchema = parquetSchema.sortBy(f =>
+ ordinalMap.getOrElse(f.name.toLowerCase, metastoreSchema.size + 1))
StructType(metastoreSchema.zip(reorderedParquetSchema).map {
// Uses Parquet field names but retains Metastore data types.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
index 321832cd43..8462f9bb2d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
@@ -212,8 +212,11 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
StructField("UPPERCase", IntegerType, nullable = true))))
}
- // Conflicting field count
- assert(intercept[Throwable] {
+ // MetaStore schema is subset of parquet schema
+ assertResult(
+ StructType(Seq(
+ StructField("UPPERCase", DoubleType, nullable = false)))) {
+
ParquetRelation2.mergeMetastoreParquetSchema(
StructType(Seq(
StructField("uppercase", DoubleType, nullable = false))),
@@ -221,6 +224,17 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
StructType(Seq(
StructField("lowerCase", BinaryType),
StructField("UPPERCase", IntegerType, nullable = true))))
+ }
+
+ // Conflicting field count
+ assert(intercept[Throwable] {
+ ParquetRelation2.mergeMetastoreParquetSchema(
+ StructType(Seq(
+ StructField("uppercase", DoubleType, nullable = false),
+ StructField("lowerCase", BinaryType))),
+
+ StructType(Seq(
+ StructField("UPPERCase", IntegerType, nullable = true))))
}.getMessage.contains("detected conflicting schemas"))
// Conflicting field names