diff options
Diffstat (limited to 'sql/hive/src')
10 files changed, 167 insertions, 0 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index b745d8ffd8..844673f66d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -811,6 +811,8 @@ private[hive] object HiveQl { val IN = "(?i)IN".r val DIV = "(?i)DIV".r val BETWEEN = "(?i)BETWEEN".r + val WHEN = "(?i)WHEN".r + val CASE = "(?i)CASE".r protected def nodeToExpr(node: Node): Expression = node match { /* Attribute References */ @@ -917,6 +919,21 @@ private[hive] object HiveQl { case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), nodeToExpr(right)) case Token(NOT(), child :: Nil) => Not(nodeToExpr(child)) + /* Case statements */ + case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) => + CaseWhen(branches.map(nodeToExpr)) + case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) => + val transformed = branches.drop(1).sliding(2, 2).map { + case Seq(condVal, value) => + // FIXME (SPARK-2155): the key will get evaluated for multiple times in CaseWhen's eval(). + // Hence effectful / non-deterministic key expressions are *not* supported at the moment. + // We should consider adding new Expressions to get around this. + Seq(Equals(nodeToExpr(branches(0)), nodeToExpr(condVal)), + nodeToExpr(value)) + case Seq(elseVal) => Seq(nodeToExpr(elseVal)) + }.toSeq.reduce(_ ++ _) + CaseWhen(transformed) + /* Complex datatype manipulation */ case Token("[", child :: ordinal :: Nil) => GetItem(nodeToExpr(child), nodeToExpr(ordinal)) diff --git a/sql/hive/src/test/resources/golden/case statements WITHOUT key #1-0-36750f0f6727c287c471309689ff7563 b/sql/hive/src/test/resources/golden/case statements WITHOUT key #1-0-36750f0f6727c287c471309689ff7563 new file mode 100644 index 0000000000..816fe57d16 --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements WITHOUT key #1-0-36750f0f6727c287c471309689ff7563 @@ -0,0 +1,14 @@ +NULL +3 +3 +3 +NULL +NULL +3 +3 +3 +3 +NULL +3 +3 +3 diff --git a/sql/hive/src/test/resources/golden/case statements WITHOUT key #2-0-e3a2b981ebff7e273537dd6c43ece0c0 b/sql/hive/src/test/resources/golden/case statements WITHOUT key #2-0-e3a2b981ebff7e273537dd6c43ece0c0 new file mode 100644 index 0000000000..4cca081e6e --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements WITHOUT key #2-0-e3a2b981ebff7e273537dd6c43ece0c0 @@ -0,0 +1,14 @@ +4 +3 +3 +3 +4 +4 +3 +3 +3 +3 +4 +3 +3 +3 diff --git a/sql/hive/src/test/resources/golden/case statements WITHOUT key #3-0-be5efc0574a97ec465e2686f4a724bd5 b/sql/hive/src/test/resources/golden/case statements WITHOUT key #3-0-be5efc0574a97ec465e2686f4a724bd5 new file mode 100644 index 0000000000..8d0416a8f8 --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements WITHOUT key #3-0-be5efc0574a97ec465e2686f4a724bd5 @@ -0,0 +1,14 @@ +2 +3 +3 +3 +2 +2 +3 +3 +3 +3 +NULL +3 +3 +3 diff --git a/sql/hive/src/test/resources/golden/case statements WITHOUT key #4-0-631f824a91b7230657bea7a05e393a1e b/sql/hive/src/test/resources/golden/case statements WITHOUT key #4-0-631f824a91b7230657bea7a05e393a1e new file mode 100644 index 0000000000..6ed452bcd8 --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements WITHOUT key #4-0-631f824a91b7230657bea7a05e393a1e @@ -0,0 +1,14 @@ +2 +3 +3 +3 +2 +2 +3 +3 +3 +3 +0 +3 +3 +3 diff --git a/sql/hive/src/test/resources/golden/case statements with key #1-0-616830b2011da0990e87a188fb609299 b/sql/hive/src/test/resources/golden/case statements with key #1-0-616830b2011da0990e87a188fb609299 new file mode 100644 index 0000000000..3f5a2fbbe9 --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements with key #1-0-616830b2011da0990e87a188fb609299 @@ -0,0 +1,14 @@ +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL diff --git a/sql/hive/src/test/resources/golden/case statements with key #2-0-6c5b5a997949f9e5ab9676b60e95657b b/sql/hive/src/test/resources/golden/case statements with key #2-0-6c5b5a997949f9e5ab9676b60e95657b new file mode 100644 index 0000000000..e1ca6e76d1 --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements with key #2-0-6c5b5a997949f9e5ab9676b60e95657b @@ -0,0 +1,14 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +3 +0 +0 +0 diff --git a/sql/hive/src/test/resources/golden/case statements with key #3-0-a241862582c47d9e98be95339d35c7c4 b/sql/hive/src/test/resources/golden/case statements with key #3-0-a241862582c47d9e98be95339d35c7c4 new file mode 100644 index 0000000000..896207fdbc --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements with key #3-0-a241862582c47d9e98be95339d35c7c4 @@ -0,0 +1,14 @@ +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +3 +NULL +NULL +NULL diff --git a/sql/hive/src/test/resources/golden/case statements with key #4-0-ea87ca38ead8858d2337792dcd430226 b/sql/hive/src/test/resources/golden/case statements with key #4-0-ea87ca38ead8858d2337792dcd430226 new file mode 100644 index 0000000000..e1ca6e76d1 --- /dev/null +++ b/sql/hive/src/test/resources/golden/case statements with key #4-0-ea87ca38ead8858d2337792dcd430226 @@ -0,0 +1,14 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +3 +0 +0 +0 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 04652587f9..fe698f0fc5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -164,6 +164,44 @@ class HiveQuerySuite extends HiveComparisonTest { hql("SELECT * FROM src").toString } + createQueryTest("case statements with key #1", + "SELECT (CASE 1 WHEN 2 THEN 3 END) FROM src where key < 15") + + createQueryTest("case statements with key #2", + "SELECT (CASE key WHEN 2 THEN 3 ELSE 0 END) FROM src WHERE key < 15") + + createQueryTest("case statements with key #3", + "SELECT (CASE key WHEN 2 THEN 3 WHEN NULL THEN 4 END) FROM src WHERE key < 15") + + createQueryTest("case statements with key #4", + "SELECT (CASE key WHEN 2 THEN 3 WHEN NULL THEN 4 ELSE 0 END) FROM src WHERE key < 15") + + createQueryTest("case statements WITHOUT key #1", + "SELECT (CASE WHEN key > 2 THEN 3 END) FROM src WHERE key < 15") + + createQueryTest("case statements WITHOUT key #2", + "SELECT (CASE WHEN key > 2 THEN 3 ELSE 4 END) FROM src WHERE key < 15") + + createQueryTest("case statements WITHOUT key #3", + "SELECT (CASE WHEN key > 2 THEN 3 WHEN 2 > key THEN 2 END) FROM src WHERE key < 15") + + createQueryTest("case statements WITHOUT key #4", + "SELECT (CASE WHEN key > 2 THEN 3 WHEN 2 > key THEN 2 ELSE 0 END) FROM src WHERE key < 15") + + test("implement identity function using case statement") { + val actual = hql("SELECT (CASE key WHEN key THEN key END) FROM src").collect().toSet + val expected = hql("SELECT key FROM src").collect().toSet + assert(actual === expected) + } + + // TODO: adopt this test when Spark SQL has the functionality / framework to report errors. + // See https://github.com/apache/spark/pull/1055#issuecomment-45820167 for a discussion. + ignore("non-boolean conditions in a CaseWhen are illegal") { + intercept[Exception] { + hql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM src").collect() + } + } + private val explainCommandClassName = classOf[execution.ExplainCommand].getSimpleName.stripSuffix("$") |