aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala17
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala17
2 files changed, 29 insertions, 5 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 3516cfe680..0d68810ec6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -435,11 +435,18 @@ private[sql] case class ParquetRelation2(
// Push down filters when possible. Notice that not all filters can be converted to Parquet
// filter predicate. Here we try to convert each individual predicate and only collect those
// convertible ones.
- predicates
- .flatMap(ParquetFilters.createFilter)
- .reduceOption(FilterApi.and)
- .filter(_ => sqlContext.conf.parquetFilterPushDown)
- .foreach(ParquetInputFormat.setFilterPredicate(jobConf, _))
+ if (sqlContext.conf.parquetFilterPushDown) {
+ predicates
+ // Don't push down predicates which reference partition columns
+ .filter { pred =>
+ val partitionColNames = partitionColumns.map(_.name).toSet
+ val referencedColNames = pred.references.map(_.name).toSet
+ referencedColNames.intersect(partitionColNames).isEmpty
+ }
+ .flatMap(ParquetFilters.createFilter)
+ .reduceOption(FilterApi.and)
+ .foreach(ParquetInputFormat.setFilterPredicate(jobConf, _))
+ }
if (isPartitioned) {
logInfo {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
index 4d32e84fc1..6a2c2a7c40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
@@ -321,6 +321,23 @@ class ParquetDataSourceOnFilterSuite extends ParquetFilterSuiteBase with BeforeA
override protected def afterAll(): Unit = {
sqlContext.setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalConf.toString)
}
+
+ test("SPARK-6554: don't push down predicates which reference partition columns") {
+ import sqlContext.implicits._
+
+ withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED -> "true") {
+ withTempPath { dir =>
+ val path = s"${dir.getCanonicalPath}/part=1"
+ (1 to 3).map(i => (i, i.toString)).toDF("a", "b").saveAsParquetFile(path)
+
+ // If the "part = 1" filter gets pushed down, this query will throw an exception since
+ // "part" is not a valid column in the actual Parquet file
+ checkAnswer(
+ sqlContext.parquetFile(path).filter("part = 1"),
+ (1 to 3).map(i => Row(i, i.toString, 1)))
+ }
+ }
+ }
}
class ParquetDataSourceOffFilterSuite extends ParquetFilterSuiteBase with BeforeAndAfterAll {