[SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed down

https://issues.apache.org/jira/browse/SPARK-12236 Currently JDBC filters are not tested properly. All the tests pass even if the filters are not pushed down due to Spark-side filtering. In this PR, Firstly, I corrected the tests to properly check the pushed down filters by removing Spark-side filtering. Also, `!=` was being tested which is actually not pushed down. So I removed them. Lastly, I moved the `stripSparkFilter()` function to `SQLTestUtils` as this functions would be shared for all tests for pushed down filters. This function would be also shared with ORC datasource as the filters for that are also not being tested properly. Author: hyukjinkwon <gurwls223@gmail.com> Closes #10221 from HyukjinKwon/SPARK-12236.
author: hyukjinkwon <gurwls223@gmail.com> 2015-12-15 17:02:14 -0800
committer: Michael Armbrust <michael@databricks.com> 2015-12-15 17:02:14 -0800
commit: 28112657ea5919451291c21b4b8e1eb3db0ec8d4 (patch)
tree: 6e25b836ea1f55d9804f35292ca5dd1653909272 /sql
parent: 86ea64dd146757c8f997d05fb5bb44f6aa58515c (diff)
download: spark-28112657ea5919451291c21b4b8e1eb3db0ec8d4.tar.gz
spark-28112657ea5919451291c21b4b8e1eb3db0ec8d4.tar.bz2
spark-28112657ea5919451291c21b4b8e1eb3db0ec8d4.zip
3 files changed, 19 insertions, 21 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index daf41bc292..6178e37d2a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -110,21 +110,6 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
     checkBinaryFilterPredicate(predicate, filterClass, Seq(Row(expected)))(df)
   }
 
-  /**
-   * Strip Spark-side filtering in order to check if a datasource filters rows correctly.
-   */
-  protected def stripSparkFilter(df: DataFrame): DataFrame = {
-    val schema = df.schema
-    val childRDD = df
-      .queryExecution
-      .executedPlan.asInstanceOf[org.apache.spark.sql.execution.Filter]
-      .child
-      .execute()
-      .map(row => Row.fromSeq(row.toSeq(schema)))
-
-    sqlContext.createDataFrame(childRDD, schema)
-  }
-
   test("filter pushdown - boolean") {
     withParquetDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
       checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 8c24aa3151..a360947152 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -176,12 +176,10 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext
   }
 
   test("SELECT * WHERE (simple predicates)") {
-    assert(sql("SELECT * FROM foobar WHERE THEID < 1").collect().size === 0)
-    assert(sql("SELECT * FROM foobar WHERE THEID != 2").collect().size === 2)
-    assert(sql("SELECT * FROM foobar WHERE THEID = 1").collect().size === 1)
-    assert(sql("SELECT * FROM foobar WHERE NAME = 'fred'").collect().size === 1)
-    assert(sql("SELECT * FROM foobar WHERE NAME > 'fred'").collect().size === 2)
-    assert(sql("SELECT * FROM foobar WHERE NAME != 'fred'").collect().size === 2)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 1")).collect().size === 0)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1")).collect().size === 1)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size === 1)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size === 2)
   }
 
   test("SELECT * WHERE (quoted strings)") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 9214569f18..e87da1527c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -180,6 +180,21 @@ private[sql] trait SQLTestUtils
   }
 
   /**
+   * Strip Spark-side filtering in order to check if a datasource filters rows correctly.
+   */
+  protected def stripSparkFilter(df: DataFrame): DataFrame = {
+    val schema = df.schema
+    val childRDD = df
+      .queryExecution
+      .executedPlan.asInstanceOf[org.apache.spark.sql.execution.Filter]
+      .child
+      .execute()
+      .map(row => Row.fromSeq(row.toSeq(schema)))
+
+    sqlContext.createDataFrame(childRDD, schema)
+  }
+
+  /**
    * Turn a logical plan into a [[DataFrame]]. This should be removed once we have an easier
    * way to construct [[DataFrame]] directly out of local data without relying on implicits.
    */
author	hyukjinkwon <gurwls223@gmail.com>	2015-12-15 17:02:14 -0800
committer	Michael Armbrust <michael@databricks.com>	2015-12-15 17:02:14 -0800
commit	28112657ea5919451291c21b4b8e1eb3db0ec8d4 (patch)
tree	6e25b836ea1f55d9804f35292ca5dd1653909272 /sql
parent	86ea64dd146757c8f997d05fb5bb44f6aa58515c (diff)
download	spark-28112657ea5919451291c21b4b8e1eb3db0ec8d4.tar.gz spark-28112657ea5919451291c21b4b8e1eb3db0ec8d4.tar.bz2 spark-28112657ea5919451291c21b4b8e1eb3db0ec8d4.zip