aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2016-01-06 23:46:12 -0800
committerReynold Xin <rxin@databricks.com>2016-01-06 23:46:12 -0800
commitfd1dcfaf2608c2cc3a439ed3ca044ae655982306 (patch)
tree01c77444dc5ac7cccc80447f7103dd1801e8a361 /sql
parent6a1c864ab6ee3e869a16ffdbaf6fead21c7aac6d (diff)
downloadspark-fd1dcfaf2608c2cc3a439ed3ca044ae655982306.tar.gz
spark-fd1dcfaf2608c2cc3a439ed3ca044ae655982306.tar.bz2
spark-fd1dcfaf2608c2cc3a439ed3ca044ae655982306.zip
[SPARK-12542][SQL] support except/intersect in HiveQl
Parse the SQL query with except/intersect in FROM clause for HivQL. Author: Davies Liu <davies@databricks.com> Closes #10622 from davies/intersect.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g1
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g12
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala7
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala32
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala18
5 files changed, 65 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
index e01e7101d0..44a63fbef2 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
@@ -103,6 +103,7 @@ KW_CLUSTER: 'CLUSTER';
KW_DISTRIBUTE: 'DISTRIBUTE';
KW_SORT: 'SORT';
KW_UNION: 'UNION';
+KW_EXCEPT: 'EXCEPT';
KW_LOAD: 'LOAD';
KW_EXPORT: 'EXPORT';
KW_IMPORT: 'IMPORT';
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
index 4afce3090f..cf8a56566d 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
@@ -88,6 +88,8 @@ TOK_DISTRIBUTEBY;
TOK_SORTBY;
TOK_UNIONALL;
TOK_UNIONDISTINCT;
+TOK_EXCEPT;
+TOK_INTERSECT;
TOK_JOIN;
TOK_LEFTOUTERJOIN;
TOK_RIGHTOUTERJOIN;
@@ -2122,6 +2124,8 @@ setOperator
@after { popMsg(state); }
: KW_UNION KW_ALL -> ^(TOK_UNIONALL)
| KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT)
+ | KW_EXCEPT -> ^(TOK_EXCEPT)
+ | KW_INTERSECT -> ^(TOK_INTERSECT)
;
queryStatementExpression[boolean topLevel]
@@ -2242,7 +2246,7 @@ setOpSelectStatement[CommonTree t, boolean topLevel]
^(TOK_QUERY
^(TOK_FROM
^(TOK_SUBQUERY
- ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b)
+ ^($u {$setOpSelectStatement.tree} $b)
{adaptor.create(Identifier, generateUnionAlias())}
)
)
@@ -2252,12 +2256,12 @@ setOpSelectStatement[CommonTree t, boolean topLevel]
)
)
-> {$setOpSelectStatement.tree != null && $u.tree.getType()!=SparkSqlParser.TOK_UNIONDISTINCT}?
- ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b)
+ ^($u {$setOpSelectStatement.tree} $b)
-> {$setOpSelectStatement.tree == null && $u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}?
^(TOK_QUERY
^(TOK_FROM
^(TOK_SUBQUERY
- ^(TOK_UNIONALL {$t} $b)
+ ^($u {$t} $b)
{adaptor.create(Identifier, generateUnionAlias())}
)
)
@@ -2266,7 +2270,7 @@ setOpSelectStatement[CommonTree t, boolean topLevel]
^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
)
)
- -> ^(TOK_UNIONALL {$t} $b)
+ -> ^($u {$t} $b)
)+
o=orderByClause?
c=clusterByClause?
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
index 42bdf25b61..1eda4a9a97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
@@ -399,9 +399,14 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
// return With plan if there is CTE
cteRelations.map(With(query, _)).getOrElse(query)
- // HIVE-9039 renamed TOK_UNION => TOK_UNIONALL while adding TOK_UNIONDISTINCT
case Token("TOK_UNIONALL", left :: right :: Nil) =>
Union(nodeToPlan(left), nodeToPlan(right))
+ case Token("TOK_UNIONDISTINCT", left :: right :: Nil) =>
+ Distinct(Union(nodeToPlan(left), nodeToPlan(right)))
+ case Token("TOK_EXCEPT", left :: right :: Nil) =>
+ Except(nodeToPlan(left), nodeToPlan(right))
+ case Token("TOK_INTERSECT", left :: right :: Nil) =>
+ Intersect(nodeToPlan(left), nodeToPlan(right))
case _ =>
noParseRule("Plan", node)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
new file mode 100644
index 0000000000..0fee97fb07
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.plans.PlanTest
+
+class CatalystQlSuite extends PlanTest {
+
+ test("parse union/except/intersect") {
+ val paresr = new CatalystQl()
+ paresr.createPlan("select * from t1 union all select * from t2")
+ paresr.createPlan("select * from t1 union distinct select * from t2")
+ paresr.createPlan("select * from t1 union select * from t2")
+ paresr.createPlan("select * from t1 except select * from t2")
+ paresr.createPlan("select * from t1 intersect select * from t2")
+ }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 98e22c2e2c..fa99289b41 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -787,6 +787,24 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
assert(sql("select key from src having key > 490").collect().size < 100)
}
+ test("union/except/intersect") {
+ assertResult(Array(Row(1), Row(1))) {
+ sql("select 1 as a union all select 1 as a").collect()
+ }
+ assertResult(Array(Row(1))) {
+ sql("select 1 as a union distinct select 1 as a").collect()
+ }
+ assertResult(Array(Row(1))) {
+ sql("select 1 as a union select 1 as a").collect()
+ }
+ assertResult(Array()) {
+ sql("select 1 as a except select 1 as a").collect()
+ }
+ assertResult(Array(Row(1))) {
+ sql("select 1 as a intersect select 1 as a").collect()
+ }
+ }
+
test("SPARK-5383 alias for udfs with multi output columns") {
assert(
sql("select stack(2, key, value, key, value) as (a, b) from src limit 5")