diff options
author | Liang-Chi Hsieh <viirya@gmail.com> | 2017-02-03 11:58:42 +0100 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2017-02-03 11:58:42 +0100 |
commit | bf493686eb17006727b3ec81849b22f3df68fdef (patch) | |
tree | c37839542b311ada2426636dcca4c402eaf49999 /sql/catalyst | |
parent | c86a57f4d1a39ab9602733a09d8fec13506cc6d4 (diff) | |
download | spark-bf493686eb17006727b3ec81849b22f3df68fdef.tar.gz spark-bf493686eb17006727b3ec81849b22f3df68fdef.tar.bz2 spark-bf493686eb17006727b3ec81849b22f3df68fdef.zip |
[SPARK-19411][SQL] Remove the metadata used to mark optional columns in merged Parquet schema for filter predicate pushdown
## What changes were proposed in this pull request?
There is a metadata introduced before to mark the optional columns in merged Parquet schema for filter predicate pushdown. As we upgrade to Parquet 1.8.2 which includes the fix for the pushdown of optional columns, we don't need this metadata now.
## How was this patch tested?
Jenkins tests.
Please review http://spark.apache.org/contributing.html before opening a pull request.
Author: Liang-Chi Hsieh <viirya@gmail.com>
Closes #16756 from viirya/remove-optional-metadata.
Diffstat (limited to 'sql/catalyst')
3 files changed, 4 insertions, 64 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9e6dbf3344..4913dccf4b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -575,9 +575,9 @@ class Analyzer( // |- view2 (defaultDatabase = db2) // |- view3 (defaultDatabase = db3) // |- view4 (defaultDatabase = db4) - // In this case, the view `view1` is a nested view, it directly references `table2`、`view2` + // In this case, the view `view1` is a nested view, it directly references `table2`, `view2` // and `view4`, the view `view2` references `view3`. On resolving the table, we look up the - // relations `table2`、`view2`、`view4` using the default database `db1`, and look up the + // relations `table2`, `view2`, `view4` using the default database `db1`, and look up the // relation `view3` using the default database `db2`. // // Note this is compatible with the views defined by older versions of Spark(before 2.2), which diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index ca0000a465..8d8b5b86d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -402,13 +402,6 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru @InterfaceStability.Stable object StructType extends AbstractDataType { - /** - * A key used in field metadata to indicate that the field comes from the result of merging - * two different StructTypes that do not always contain the field. That is to say, the field - * might be missing (optional) from one of the StructTypes. - */ - private[sql] val metadataKeyForOptionalField = "_OPTIONAL_" - override private[sql] def defaultConcreteType: DataType = new StructType override private[sql] def acceptsType(other: DataType): Boolean = { @@ -463,8 +456,6 @@ object StructType extends AbstractDataType { case (StructType(leftFields), StructType(rightFields)) => val newFields = ArrayBuffer.empty[StructField] - // This metadata will record the fields that only exist in one of two StructTypes - val optionalMeta = new MetadataBuilder() val rightMapped = fieldsMap(rightFields) leftFields.foreach { @@ -476,8 +467,7 @@ object StructType extends AbstractDataType { nullable = leftNullable || rightNullable) } .orElse { - optionalMeta.putBoolean(metadataKeyForOptionalField, value = true) - Some(leftField.copy(metadata = optionalMeta.build())) + Some(leftField) } .foreach(newFields += _) } @@ -486,8 +476,7 @@ object StructType extends AbstractDataType { rightFields .filterNot(f => leftMapped.get(f.name).nonEmpty) .foreach { f => - optionalMeta.putBoolean(metadataKeyForOptionalField, value = true) - newFields += f.copy(metadata = optionalMeta.build()) + newFields += f } StructType(newFields) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 12d2c00dc9..61e1ec7c7a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -132,55 +132,6 @@ class DataTypeSuite extends SparkFunSuite { assert(mapped === expected) } - test("merge where right is empty") { - val left = StructType( - StructField("a", LongType) :: - StructField("b", FloatType) :: Nil) - - val right = StructType(List()) - val merged = left.merge(right) - - assert(DataType.equalsIgnoreCompatibleNullability(merged, left)) - assert(merged("a").metadata.getBoolean(StructType.metadataKeyForOptionalField)) - assert(merged("b").metadata.getBoolean(StructType.metadataKeyForOptionalField)) - } - - test("merge where left is empty") { - - val left = StructType(List()) - - val right = StructType( - StructField("a", LongType) :: - StructField("b", FloatType) :: Nil) - - val merged = left.merge(right) - - assert(DataType.equalsIgnoreCompatibleNullability(merged, right)) - assert(merged("a").metadata.getBoolean(StructType.metadataKeyForOptionalField)) - assert(merged("b").metadata.getBoolean(StructType.metadataKeyForOptionalField)) - } - - test("merge where both are non-empty") { - val left = StructType( - StructField("a", LongType) :: - StructField("b", FloatType) :: Nil) - - val right = StructType( - StructField("c", LongType) :: Nil) - - val expected = StructType( - StructField("a", LongType) :: - StructField("b", FloatType) :: - StructField("c", LongType) :: Nil) - - val merged = left.merge(right) - - assert(DataType.equalsIgnoreCompatibleNullability(merged, expected)) - assert(merged("a").metadata.getBoolean(StructType.metadataKeyForOptionalField)) - assert(merged("b").metadata.getBoolean(StructType.metadataKeyForOptionalField)) - assert(merged("c").metadata.getBoolean(StructType.metadataKeyForOptionalField)) - } - test("merge where right contains type conflict") { val left = StructType( StructField("a", LongType) :: |