aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2015-11-08 11:06:10 -0800
committerYin Huai <yhuai@databricks.com>2015-11-08 11:06:10 -0800
commit30c8ba71a76788cbc6916bc1ba6bc8522925fc2b (patch)
tree851dbdcce7d78bbf6fd4c948dd4407642fea63cc /sql/core/src
parent5c4e6d7ec9157c02494a382dfb49e7fbde3be222 (diff)
downloadspark-30c8ba71a76788cbc6916bc1ba6bc8522925fc2b.tar.gz
spark-30c8ba71a76788cbc6916bc1ba6bc8522925fc2b.tar.bz2
spark-30c8ba71a76788cbc6916bc1ba6bc8522925fc2b.zip
[SPARK-11451][SQL] Support single distinct count on multiple columns.
This PR adds support for multiple column in a single count distinct aggregate to the new aggregation path. cc yhuai Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #9409 from hvanhovell/SPARK-11451.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala25
1 files changed, 25 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 2e679e7bc4..eb1ee266c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -162,6 +162,31 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
)
}
+ test("multiple column distinct count") {
+ val df1 = Seq(
+ ("a", "b", "c"),
+ ("a", "b", "c"),
+ ("a", "b", "d"),
+ ("x", "y", "z"),
+ ("x", "q", null.asInstanceOf[String]))
+ .toDF("key1", "key2", "key3")
+
+ checkAnswer(
+ df1.agg(countDistinct('key1, 'key2)),
+ Row(3)
+ )
+
+ checkAnswer(
+ df1.agg(countDistinct('key1, 'key2, 'key3)),
+ Row(3)
+ )
+
+ checkAnswer(
+ df1.groupBy('key1).agg(countDistinct('key2, 'key3)),
+ Seq(Row("a", 2), Row("x", 1))
+ )
+ }
+
test("zero count") {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(