aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala30
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala29
2 files changed, 53 insertions, 6 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index ec653efcc8..c69e3dba6b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -204,6 +204,9 @@ private[hive] object HiveQl {
class ParseException(sql: String, cause: Throwable)
extends Exception(s"Failed to parse: $sql", cause)
+ class SemanticException(msg: String)
+ extends Exception(s"Error in semantic analysis: $msg")
+
/**
* Returns the AST for the given SQL string.
*/
@@ -480,6 +483,7 @@ private[hive] object HiveQl {
whereClause ::
groupByClause ::
orderByClause ::
+ havingClause ::
sortByClause ::
clusterByClause ::
distributeByClause ::
@@ -494,6 +498,7 @@ private[hive] object HiveQl {
"TOK_WHERE",
"TOK_GROUPBY",
"TOK_ORDERBY",
+ "TOK_HAVING",
"TOK_SORTBY",
"TOK_CLUSTERBY",
"TOK_DISTRIBUTEBY",
@@ -576,21 +581,34 @@ private[hive] object HiveQl {
val withDistinct =
if (selectDistinctClause.isDefined) Distinct(withProject) else withProject
+ val withHaving = havingClause.map { h =>
+
+ if (groupByClause == None) {
+ throw new SemanticException("HAVING specified without GROUP BY")
+ }
+
+ val havingExpr = h.getChildren.toSeq match {
+ case Seq(hexpr) => nodeToExpr(hexpr)
+ }
+
+ Filter(Cast(havingExpr, BooleanType), withDistinct)
+ }.getOrElse(withDistinct)
+
val withSort =
(orderByClause, sortByClause, distributeByClause, clusterByClause) match {
case (Some(totalOrdering), None, None, None) =>
- Sort(totalOrdering.getChildren.map(nodeToSortOrder), withDistinct)
+ Sort(totalOrdering.getChildren.map(nodeToSortOrder), withHaving)
case (None, Some(perPartitionOrdering), None, None) =>
- SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder), withDistinct)
+ SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder), withHaving)
case (None, None, Some(partitionExprs), None) =>
- Repartition(partitionExprs.getChildren.map(nodeToExpr), withDistinct)
+ Repartition(partitionExprs.getChildren.map(nodeToExpr), withHaving)
case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder),
- Repartition(partitionExprs.getChildren.map(nodeToExpr), withDistinct))
+ Repartition(partitionExprs.getChildren.map(nodeToExpr), withHaving))
case (None, None, None, Some(clusterExprs)) =>
SortPartitions(clusterExprs.getChildren.map(nodeToExpr).map(SortOrder(_, Ascending)),
- Repartition(clusterExprs.getChildren.map(nodeToExpr), withDistinct))
- case (None, None, None, None) => withDistinct
+ Repartition(clusterExprs.getChildren.map(nodeToExpr), withHaving))
+ case (None, None, None, None) => withHaving
case _ => sys.error("Unsupported set of ordering / distribution clauses.")
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 9f5cf282f7..80185098bf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -224,6 +224,32 @@ class HiveQuerySuite extends HiveComparisonTest {
TestHive.reset()
}
+ test("SPARK-2180: HAVING support in GROUP BY clauses (positive)") {
+ val fixture = List(("foo", 2), ("bar", 1), ("foo", 4), ("bar", 3))
+ .zipWithIndex.map {case Pair(Pair(value, attr), key) => HavingRow(key, value, attr)}
+
+ TestHive.sparkContext.parallelize(fixture).registerAsTable("having_test")
+
+ val results =
+ hql("SELECT value, max(attr) AS attr FROM having_test GROUP BY value HAVING attr > 3")
+ .collect()
+ .map(x => Pair(x.getString(0), x.getInt(1)))
+
+ assert(results === Array(Pair("foo", 4)))
+
+ TestHive.reset()
+ }
+
+ test("SPARK-2180: HAVING without GROUP BY raises exception") {
+ intercept[Exception] {
+ hql("SELECT value, attr FROM having_test HAVING attr > 3")
+ }
+ }
+
+ test("SPARK-2180: HAVING with non-boolean clause raises no exceptions") {
+ val results = hql("select key, count(*) c from src group by key having c").collect()
+ }
+
test("Query Hive native command execution result") {
val tableName = "test_native_commands"
@@ -441,3 +467,6 @@ class HiveQuerySuite extends HiveComparisonTest {
// since they modify /clear stuff.
}
+
+// for SPARK-2180 test
+case class HavingRow(key: Int, value: String, attr: Int)