aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorravipesala <ravindra.pesala@huawei.com>2014-12-01 13:26:44 -0800
committerMichael Armbrust <michael@databricks.com>2014-12-01 13:28:04 -0800
commit6a9ff19dc06745144d5b311d4f87073c81d53a8f (patch)
treef6bcc0ddb1f51320ad1c3aeebb74aa2b1642249a /sql
parentb57365a1ec89e31470f424ff37d5ebc7c90a39d8 (diff)
downloadspark-6a9ff19dc06745144d5b311d4f87073c81d53a8f.tar.gz
spark-6a9ff19dc06745144d5b311d4f87073c81d53a8f.tar.bz2
spark-6a9ff19dc06745144d5b311d4f87073c81d53a8f.zip
[SPARK-4650][SQL] Supporting multi column support in countDistinct function like count(distinct c1,c2..) in Spark SQL
Supporting multi column support in countDistinct function like count(distinct c1,c2..) in Spark SQL Author: ravipesala <ravindra.pesala@huawei.com> Author: Michael Armbrust <michael@databricks.com> Closes #3511 from ravipesala/countdistinct and squashes the following commits: cc4dbb1 [ravipesala] style 070e12a [ravipesala] Supporting multi column support in count(distinct c1,c2..) in Spark SQL
Diffstat (limited to 'sql')
-rwxr-xr-xsql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala3
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala7
2 files changed, 9 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index a9ff10f2d5..a2bcd73b60 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -277,7 +277,8 @@ class SqlParser extends AbstractSparkSQLParser {
| SUM ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) }
| COUNT ~ "(" ~> "*" <~ ")" ^^ { case _ => Count(Literal(1)) }
| COUNT ~ "(" ~> expression <~ ")" ^^ { case exp => Count(exp) }
- | COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) }
+ | COUNT ~> "(" ~> DISTINCT ~> repsep(expression, ",") <~ ")" ^^
+ { case exps => CountDistinct(exps) }
| APPROXIMATE ~ COUNT ~ "(" ~ DISTINCT ~> expression <~ ")" ^^
{ case exp => ApproxCountDistinct(exp) }
| APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 84ee3051eb..f83e647014 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -992,4 +992,11 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
"nulldata2 on nulldata1.value <=> nulldata2.value"),
(1 to 2).map(i => Seq(i)))
}
+
+ test("Multi-column COUNT(DISTINCT ...)") {
+ val data = TestData(1,"val_1") :: TestData(2,"val_2") :: Nil
+ val rdd = sparkContext.parallelize((0 to 1).map(i => data(i)))
+ rdd.registerTempTable("distinctData")
+ checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), 2)
+ }
}