diff options
author | Dilip Biswal <dbiswal@us.ibm.com> | 2015-10-31 12:55:33 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2015-10-31 12:55:33 -0700 |
commit | fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e (patch) | |
tree | dbf32346fb4ea6e06135ec64cf9aedf822243313 /sql | |
parent | ac4118db2dda802b936bb7a18a08844846c71285 (diff) | |
download | spark-fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e.tar.gz spark-fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e.tar.bz2 spark-fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e.zip |
[SPARK-11024][SQL] Optimize NULL in <inlist-expressions> by folding it to Literal(null)
Add a rule in optimizer to convert NULL [NOT] IN (expr1,...,expr2) to
Literal(null).
This is a follow up defect to SPARK-8654
cloud-fan Can you please take a look ?
Author: Dilip Biswal <dbiswal@us.ibm.com>
Closes #9348 from dilipbiswal/spark_11024.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 5 | ||||
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala | 51 |
2 files changed, 55 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index d37f43888f..338c5193cb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -417,6 +417,11 @@ object NullPropagation extends Rule[LogicalPlan] { case left :: Literal(null, _) :: Nil => Literal.create(null, e.dataType) case _ => e } + + // If the value expression is NULL then transform the In expression to + // Literal(null) + case In(Literal(null, _), list) => Literal.create(null, BooleanType) + } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala index 6f7b5b9572..48cab01ac1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala @@ -35,7 +35,8 @@ class OptimizeInSuite extends PlanTest { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: - Batch("ConstantFolding", Once, + Batch("ConstantFolding", FixedPoint(10), + NullPropagation, ConstantFolding, BooleanSimplification, OptimizeIn) :: Nil @@ -82,4 +83,52 @@ class OptimizeInSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("OptimizedIn test: NULL IN (expr1, ..., exprN) gets transformed to Filter(null)") { + val originalQuery = + testRelation + .where(In(Literal.create(null, NullType), Seq(Literal(1), Literal(2)))) + .analyze + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = + testRelation + .where(Literal.create(null, BooleanType)) + .analyze + + comparePlans(optimized, correctAnswer) + } + + test("OptimizedIn test: Inset optimization disabled as " + + "list expression contains attribute)") { + val originalQuery = + testRelation + .where(In(Literal.create(null, StringType), Seq(Literal(1), UnresolvedAttribute("b")))) + .analyze + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = + testRelation + .where(Literal.create(null, BooleanType)) + .analyze + + comparePlans(optimized, correctAnswer) + } + + test("OptimizedIn test: Inset optimization disabled as " + + "list expression contains attribute - select)") { + val originalQuery = + testRelation + .select(In(Literal.create(null, StringType), + Seq(Literal(1), UnresolvedAttribute("b"))).as("a")).analyze + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = + testRelation + .select(Literal.create(null, BooleanType).as("a")) + .analyze + + comparePlans(optimized, correctAnswer) + } + } |