diff options
author | Davies Liu <davies@databricks.com> | 2015-11-04 21:30:21 -0800 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-11-04 21:30:21 -0800 |
commit | 81498dd5c86ca51d2fb351c8ef52cbb28e6844f4 (patch) | |
tree | 609fd1a3df2aeb64592dee8930ccbcf6efc0ec2e /sql/hive | |
parent | d0b56339625727744e2c30fc2167bc6a457d37f7 (diff) | |
download | spark-81498dd5c86ca51d2fb351c8ef52cbb28e6844f4.tar.gz spark-81498dd5c86ca51d2fb351c8ef52cbb28e6844f4.tar.bz2 spark-81498dd5c86ca51d2fb351c8ef52cbb28e6844f4.zip |
[SPARK-11425] [SPARK-11486] Improve hybrid aggregation
After aggregation, the dataset could be smaller than inputs, so it's better to do hash based aggregation for all inputs, then using sort based aggregation to merge them.
Author: Davies Liu <davies@databricks.com>
Closes #9383 from davies/fix_switch.
Diffstat (limited to 'sql/hive')
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala index 74061db0f2..ea80060e37 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala @@ -22,13 +22,12 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.expressions.UnsafeRow -import org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} import org.apache.spark.sql.functions._ -import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.sql.types._ import org.apache.spark.sql.hive.aggregate.{MyDoubleAvg, MyDoubleSum} import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ class ScalaAggregateFunction(schema: StructType) extends UserDefinedAggregateFunction { @@ -702,6 +701,13 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te } } + test("no aggregation function (SPARK-11486)") { + val df = sqlContext.range(20).selectExpr("id", "repeat(id, 1) as s") + .groupBy("s").count() + .groupBy().count() + checkAnswer(df, Row(20) :: Nil) + } + test("udaf with all data types") { val struct = StructType( |