aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDilip Biswal <dbiswal@us.ibm.com>2015-10-31 12:55:33 -0700
committerYin Huai <yhuai@databricks.com>2015-10-31 12:55:33 -0700
commitfc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e (patch)
treedbf32346fb4ea6e06135ec64cf9aedf822243313 /sql
parentac4118db2dda802b936bb7a18a08844846c71285 (diff)
downloadspark-fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e.tar.gz
spark-fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e.tar.bz2
spark-fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e.zip
[SPARK-11024][SQL] Optimize NULL in <inlist-expressions> by folding it to Literal(null)
Add a rule in optimizer to convert NULL [NOT] IN (expr1,...,expr2) to Literal(null). This is a follow up defect to SPARK-8654 cloud-fan Can you please take a look ? Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #9348 from dilipbiswal/spark_11024.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala5
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala51
2 files changed, 55 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d37f43888f..338c5193cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -417,6 +417,11 @@ object NullPropagation extends Rule[LogicalPlan] {
case left :: Literal(null, _) :: Nil => Literal.create(null, e.dataType)
case _ => e
}
+
+ // If the value expression is NULL then transform the In expression to
+ // Literal(null)
+ case In(Literal(null, _), list) => Literal.create(null, BooleanType)
+
}
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index 6f7b5b9572..48cab01ac1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -35,7 +35,8 @@ class OptimizeInSuite extends PlanTest {
val batches =
Batch("AnalysisNodes", Once,
EliminateSubQueries) ::
- Batch("ConstantFolding", Once,
+ Batch("ConstantFolding", FixedPoint(10),
+ NullPropagation,
ConstantFolding,
BooleanSimplification,
OptimizeIn) :: Nil
@@ -82,4 +83,52 @@ class OptimizeInSuite extends PlanTest {
comparePlans(optimized, correctAnswer)
}
+
+ test("OptimizedIn test: NULL IN (expr1, ..., exprN) gets transformed to Filter(null)") {
+ val originalQuery =
+ testRelation
+ .where(In(Literal.create(null, NullType), Seq(Literal(1), Literal(2))))
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val correctAnswer =
+ testRelation
+ .where(Literal.create(null, BooleanType))
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("OptimizedIn test: Inset optimization disabled as " +
+ "list expression contains attribute)") {
+ val originalQuery =
+ testRelation
+ .where(In(Literal.create(null, StringType), Seq(Literal(1), UnresolvedAttribute("b"))))
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val correctAnswer =
+ testRelation
+ .where(Literal.create(null, BooleanType))
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("OptimizedIn test: Inset optimization disabled as " +
+ "list expression contains attribute - select)") {
+ val originalQuery =
+ testRelation
+ .select(In(Literal.create(null, StringType),
+ Seq(Literal(1), UnresolvedAttribute("b"))).as("a")).analyze
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val correctAnswer =
+ testRelation
+ .select(Literal.create(null, BooleanType).as("a"))
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
}