From e2fb9fd365466da888ab8b3a2a0836049a65f8c8 Mon Sep 17 00:00:00 2001 From: jiangxingbo Date: Fri, 25 Nov 2016 12:44:34 -0800 Subject: [SPARK-18436][SQL] isin causing SQL syntax error with JDBC ## What changes were proposed in this pull request? The expression `in(empty seq)` is invalid in some data source. Since `in(empty seq)` is always false, we should generate `in(empty seq)` to false literal in optimizer. The sql `SELECT * FROM t WHERE a IN ()` throws a `ParseException` which is consistent with Hive, don't need to change that behavior. ## How was this patch tested? Add new test case in `OptimizeInSuite`. Author: jiangxingbo Closes #15977 from jiangxb1987/isin-empty. --- .../sql/catalyst/expressions/PredicateSuite.scala | 24 ++++++++++++---------- .../sql/execution/datasources/jdbc/JDBCRDD.scala | 2 ++ .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 2 ++ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index f9f6799e6e..6fc3de178f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -35,7 +35,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { test(s"3VL $name") { truthTable.foreach { case (l, r, answer) => - val expr = op(Literal.create(l, BooleanType), Literal.create(r, BooleanType)) + val expr = op(NonFoldableLiteral(l, BooleanType), NonFoldableLiteral(r, BooleanType)) checkEvaluation(expr, answer) } } @@ -72,7 +72,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { (false, true) :: (null, null) :: Nil notTrueTable.foreach { case (v, answer) => - checkEvaluation(Not(Literal.create(v, BooleanType)), answer) + checkEvaluation(Not(NonFoldableLiteral(v, BooleanType)), answer) } checkConsistencyBetweenInterpretedAndCodegen(Not, BooleanType) } @@ -120,12 +120,14 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { (null, null, null) :: Nil) test("IN") { - checkEvaluation(In(Literal.create(null, IntegerType), Seq(Literal(1), Literal(2))), null) - checkEvaluation(In(Literal.create(null, IntegerType), Seq(Literal.create(null, IntegerType))), - null) - checkEvaluation(In(Literal(1), Seq(Literal.create(null, IntegerType))), null) - checkEvaluation(In(Literal(1), Seq(Literal(1), Literal.create(null, IntegerType))), true) - checkEvaluation(In(Literal(2), Seq(Literal(1), Literal.create(null, IntegerType))), null) + checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq(Literal(1), Literal(2))), null) + checkEvaluation(In(NonFoldableLiteral(null, IntegerType), + Seq(NonFoldableLiteral(null, IntegerType))), null) + checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq.empty), null) + checkEvaluation(In(Literal(1), Seq.empty), false) + checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral(null, IntegerType))), null) + checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), true) + checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), null) checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))), true) checkEvaluation(In(Literal(2), Seq(Literal(1), Literal(2))), true) checkEvaluation(In(Literal(3), Seq(Literal(1), Literal(2))), false) @@ -133,7 +135,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1), Literal(2)))), true) - val ns = Literal.create(null, StringType) + val ns = NonFoldableLiteral(null, StringType) checkEvaluation(In(ns, Seq(Literal("1"), Literal("2"))), null) checkEvaluation(In(ns, Seq(ns)), null) checkEvaluation(In(Literal("a"), Seq(ns)), null) @@ -153,7 +155,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { case _ => value } } - val input = inputData.map(Literal.create(_, t)) + val input = inputData.map(NonFoldableLiteral(_, t)) val expected = if (inputData(0) == null) { null } else if (inputData.slice(1, 10).contains(inputData(0))) { @@ -277,7 +279,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { test("BinaryComparison: null test") { // Use -1 (default value for codegen) which can trigger some weird bugs, e.g. SPARK-14757 val normalInt = Literal(-1) - val nullInt = Literal.create(null, IntegerType) + val nullInt = NonFoldableLiteral(null, IntegerType) def nullTest(op: (Expression, Expression) => Expression): Unit = { checkEvaluation(op(normalInt, nullInt), null) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index c0fabc81e4..a1e5dfdbf7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -120,6 +120,8 @@ object JDBCRDD extends Logging { case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'" case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'" case StringContains(attr, value) => s"${attr} LIKE '%${value}%'" + case In(attr, value) if value.isEmpty => + s"CASE WHEN ${attr} IS NULL THEN NULL ELSE FALSE END" case In(attr, value) => s"$attr IN (${compileValue(value)})" case Not(f) => compileFilter(f).map(p => s"(NOT ($p))").getOrElse(null) case Or(f1, f2) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 71cf5e6a22..f921939ada 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -619,6 +619,8 @@ class JDBCSuite extends SparkFunSuite assert(doCompileFilter(GreaterThan("col0", 3)) === "col0 > 3") assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === "col0 >= 3") assert(doCompileFilter(In("col1", Array("jkl"))) === "col1 IN ('jkl')") + assert(doCompileFilter(In("col1", Array.empty)) === + "CASE WHEN col1 IS NULL THEN NULL ELSE FALSE END") assert(doCompileFilter(Not(In("col1", Array("mno", "pqr")))) === "(NOT (col1 IN ('mno', 'pqr')))") assert(doCompileFilter(IsNull("col1")) === "col1 IS NULL") -- cgit v1.2.3