aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@gmail.com>2017-02-03 11:58:42 +0100
committerReynold Xin <rxin@databricks.com>2017-02-03 11:58:42 +0100
commitbf493686eb17006727b3ec81849b22f3df68fdef (patch)
treec37839542b311ada2426636dcca4c402eaf49999 /sql/catalyst
parentc86a57f4d1a39ab9602733a09d8fec13506cc6d4 (diff)
downloadspark-bf493686eb17006727b3ec81849b22f3df68fdef.tar.gz
spark-bf493686eb17006727b3ec81849b22f3df68fdef.tar.bz2
spark-bf493686eb17006727b3ec81849b22f3df68fdef.zip
[SPARK-19411][SQL] Remove the metadata used to mark optional columns in merged Parquet schema for filter predicate pushdown
## What changes were proposed in this pull request? There is a metadata introduced before to mark the optional columns in merged Parquet schema for filter predicate pushdown. As we upgrade to Parquet 1.8.2 which includes the fix for the pushdown of optional columns, we don't need this metadata now. ## How was this patch tested? Jenkins tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #16756 from viirya/remove-optional-metadata.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala15
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala49
3 files changed, 4 insertions, 64 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 9e6dbf3344..4913dccf4b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -575,9 +575,9 @@ class Analyzer(
// |- view2 (defaultDatabase = db2)
// |- view3 (defaultDatabase = db3)
// |- view4 (defaultDatabase = db4)
- // In this case, the view `view1` is a nested view, it directly references `table2`、`view2`
+ // In this case, the view `view1` is a nested view, it directly references `table2`, `view2`
// and `view4`, the view `view2` references `view3`. On resolving the table, we look up the
- // relations `table2`、`view2`、`view4` using the default database `db1`, and look up the
+ // relations `table2`, `view2`, `view4` using the default database `db1`, and look up the
// relation `view3` using the default database `db2`.
//
// Note this is compatible with the views defined by older versions of Spark(before 2.2), which
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index ca0000a465..8d8b5b86d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -402,13 +402,6 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
@InterfaceStability.Stable
object StructType extends AbstractDataType {
- /**
- * A key used in field metadata to indicate that the field comes from the result of merging
- * two different StructTypes that do not always contain the field. That is to say, the field
- * might be missing (optional) from one of the StructTypes.
- */
- private[sql] val metadataKeyForOptionalField = "_OPTIONAL_"
-
override private[sql] def defaultConcreteType: DataType = new StructType
override private[sql] def acceptsType(other: DataType): Boolean = {
@@ -463,8 +456,6 @@ object StructType extends AbstractDataType {
case (StructType(leftFields), StructType(rightFields)) =>
val newFields = ArrayBuffer.empty[StructField]
- // This metadata will record the fields that only exist in one of two StructTypes
- val optionalMeta = new MetadataBuilder()
val rightMapped = fieldsMap(rightFields)
leftFields.foreach {
@@ -476,8 +467,7 @@ object StructType extends AbstractDataType {
nullable = leftNullable || rightNullable)
}
.orElse {
- optionalMeta.putBoolean(metadataKeyForOptionalField, value = true)
- Some(leftField.copy(metadata = optionalMeta.build()))
+ Some(leftField)
}
.foreach(newFields += _)
}
@@ -486,8 +476,7 @@ object StructType extends AbstractDataType {
rightFields
.filterNot(f => leftMapped.get(f.name).nonEmpty)
.foreach { f =>
- optionalMeta.putBoolean(metadataKeyForOptionalField, value = true)
- newFields += f.copy(metadata = optionalMeta.build())
+ newFields += f
}
StructType(newFields)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 12d2c00dc9..61e1ec7c7a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -132,55 +132,6 @@ class DataTypeSuite extends SparkFunSuite {
assert(mapped === expected)
}
- test("merge where right is empty") {
- val left = StructType(
- StructField("a", LongType) ::
- StructField("b", FloatType) :: Nil)
-
- val right = StructType(List())
- val merged = left.merge(right)
-
- assert(DataType.equalsIgnoreCompatibleNullability(merged, left))
- assert(merged("a").metadata.getBoolean(StructType.metadataKeyForOptionalField))
- assert(merged("b").metadata.getBoolean(StructType.metadataKeyForOptionalField))
- }
-
- test("merge where left is empty") {
-
- val left = StructType(List())
-
- val right = StructType(
- StructField("a", LongType) ::
- StructField("b", FloatType) :: Nil)
-
- val merged = left.merge(right)
-
- assert(DataType.equalsIgnoreCompatibleNullability(merged, right))
- assert(merged("a").metadata.getBoolean(StructType.metadataKeyForOptionalField))
- assert(merged("b").metadata.getBoolean(StructType.metadataKeyForOptionalField))
- }
-
- test("merge where both are non-empty") {
- val left = StructType(
- StructField("a", LongType) ::
- StructField("b", FloatType) :: Nil)
-
- val right = StructType(
- StructField("c", LongType) :: Nil)
-
- val expected = StructType(
- StructField("a", LongType) ::
- StructField("b", FloatType) ::
- StructField("c", LongType) :: Nil)
-
- val merged = left.merge(right)
-
- assert(DataType.equalsIgnoreCompatibleNullability(merged, expected))
- assert(merged("a").metadata.getBoolean(StructType.metadataKeyForOptionalField))
- assert(merged("b").metadata.getBoolean(StructType.metadataKeyForOptionalField))
- assert(merged("c").metadata.getBoolean(StructType.metadataKeyForOptionalField))
- }
-
test("merge where right contains type conflict") {
val left = StructType(
StructField("a", LongType) ::