aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test/scala
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-07-05 13:59:13 +0800
committerCheng Lian <lian@databricks.com>2016-07-05 13:59:13 +0800
commit7742d9f1584150befeb2f3d76cdbd4ea1f37c914 (patch)
tree3ea3ef5f2a04b1492e4ad38ca65168a64d2dfb50 /sql/hive/src/test/scala
parent8f6cf00c697689174bcf522091e396b4fa5ef66d (diff)
downloadspark-7742d9f1584150befeb2f3d76cdbd4ea1f37c914.tar.gz
spark-7742d9f1584150befeb2f3d76cdbd4ea1f37c914.tar.bz2
spark-7742d9f1584150befeb2f3d76cdbd4ea1f37c914.zip
[SPARK-15198][SQL] Support for pushing down filters for boolean types in ORC data source
## What changes were proposed in this pull request? It seems ORC supports all the types in ([`PredicateLeaf.Type`](https://github.com/apache/hive/blob/e085b7e9bd059d91aaf013df0db4d71dca90ec6f/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java#L50-L56)) which includes boolean types. So, this was tested first. This PR adds the support for pushing filters down for `BooleanType` in ORC data source. This PR also removes `OrcTableScan` class and the companion object, which is not used anymore. ## How was this patch tested? Unittest in `OrcFilterSuite` and `OrcQuerySuite`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #12972 from HyukjinKwon/SPARK-15198.
Diffstat (limited to 'sql/hive/src/test/scala')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala25
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala13
2 files changed, 34 insertions, 4 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala
index 8c027f9935..7a30e548cd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala
@@ -208,6 +208,27 @@ class OrcFilterSuite extends QueryTest with OrcTest {
}
}
+ test("filter pushdown - boolean") {
+ withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
test("filter pushdown - combinations with logical operators") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
// Because `ExpressionTree` is not accessible at Hive 1.2.x, this should be checked
@@ -264,10 +285,6 @@ class OrcFilterSuite extends QueryTest with OrcTest {
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
checkNoFilterPredicate('_1 <=> 1.b)
}
- // BooleanType
- withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
- checkNoFilterPredicate('_1 === true)
- }
// TimestampType
val stringTimestamp = "2015-08-20 15:57:00"
withOrcDataFrame(Seq(Tuple1(Timestamp.valueOf(stringTimestamp)))) { implicit df =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index 4a86987e29..af8115cf9d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -464,6 +464,19 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
}
}
+ test("SPARK-15198 Support for pushing down filters for boolean types") {
+ withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") {
+ val data = (0 until 10).map(_ => (true, false))
+ withOrcFile(data) { file =>
+ val df = spark.read.orc(file).where("_2 == true")
+ val actual = stripSparkFilter(df).count()
+
+ // ORC filter should be applied and the total count should be 0.
+ assert(actual === 0)
+ }
+ }
+ }
+
test("column nullability and comment - write and then read") {
val schema = (new StructType)
.add("cl1", IntegerType, nullable = false, comment = "test")