[SPARK-12578][SQL] Distinct should not be silently ignored when used in an aggregate function with OVER clause

JIRA: https://issues.apache.org/jira/browse/SPARK-12578 Slightly update to Hive parser. We should keep the distinct keyword when used in an aggregate function with OVER clause. So the CheckAnalysis will detect it and throw exception later. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #10557 from viirya/keep-distinct-hivesql.
author: Liang-Chi Hsieh <viirya@gmail.com> 2016-01-06 00:40:14 -0800
committer: Reynold Xin <rxin@databricks.com> 2016-01-06 00:40:14 -0800
commit: b2467b381096804b862990d9ecda554f67e07ee1 (patch)
tree: 53f71062e36ea54523260e95aad05a2ff1a657c8 /sql/hive
parent: d1fea41363c175a67b97cb7b3fe89f9043708739 (diff)
download: spark-b2467b381096804b862990d9ecda554f67e07ee1.tar.gz
spark-b2467b381096804b862990d9ecda554f67e07ee1.tar.bz2
spark-b2467b381096804b862990d9ecda554f67e07ee1.zip
2 files changed, 22 insertions, 1 deletions
diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g
index 5c3d7ef866..9f1e168374 100644
--- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g
+++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g
@@ -195,7 +195,7 @@ function
     RPAREN (KW_OVER ws=window_specification)?
            -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?)
            -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?)
-                            -> ^(TOK_FUNCTIONDI functionName (selectExpression+)?)
+                            -> ^(TOK_FUNCTIONDI functionName (selectExpression+)? $ws?)
     ;
 
 functionName
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index bf65325d54..593fac2c32 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -915,6 +915,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
+  test("window function: distinct should not be silently ignored") {
+    val data = Seq(
+      WindowData(1, "a", 5),
+      WindowData(2, "a", 6),
+      WindowData(3, "b", 7),
+      WindowData(4, "b", 8),
+      WindowData(5, "c", 9),
+      WindowData(6, "c", 10)
+    )
+    sparkContext.parallelize(data).toDF().registerTempTable("windowData")
+
+    val e = intercept[AnalysisException] {
+      sql(
+        """
+          |select month, area, product, sum(distinct product + 1) over (partition by 1 order by 2)
+          |from windowData
+        """.stripMargin)
+    }
+    assert(e.getMessage.contains("Distinct window functions are not supported"))
+  }
+
   test("window function: expressions in arguments of a window functions") {
     val data = Seq(
       WindowData(1, "a", 5),
author	Liang-Chi Hsieh <viirya@gmail.com>	2016-01-06 00:40:14 -0800
committer	Reynold Xin <rxin@databricks.com>	2016-01-06 00:40:14 -0800
commit	b2467b381096804b862990d9ecda554f67e07ee1 (patch)
tree	53f71062e36ea54523260e95aad05a2ff1a657c8 /sql/hive
parent	d1fea41363c175a67b97cb7b3fe89f9043708739 (diff)
download	spark-b2467b381096804b862990d9ecda554f67e07ee1.tar.gz spark-b2467b381096804b862990d9ecda554f67e07ee1.tar.bz2 spark-b2467b381096804b862990d9ecda554f67e07ee1.zip