aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-11-04 21:30:21 -0800
committerDavies Liu <davies.liu@gmail.com>2015-11-04 21:30:21 -0800
commit81498dd5c86ca51d2fb351c8ef52cbb28e6844f4 (patch)
tree609fd1a3df2aeb64592dee8930ccbcf6efc0ec2e /sql/hive
parentd0b56339625727744e2c30fc2167bc6a457d37f7 (diff)
downloadspark-81498dd5c86ca51d2fb351c8ef52cbb28e6844f4.tar.gz
spark-81498dd5c86ca51d2fb351c8ef52cbb28e6844f4.tar.bz2
spark-81498dd5c86ca51d2fb351c8ef52cbb28e6844f4.zip
[SPARK-11425] [SPARK-11486] Improve hybrid aggregation
After aggregation, the dataset could be smaller than inputs, so it's better to do hash based aggregation for all inputs, then using sort based aggregation to merge them. Author: Davies Liu <davies@databricks.com> Closes #9383 from davies/fix_switch.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala12
1 files changed, 9 insertions, 3 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index 74061db0f2..ea80060e37 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -22,13 +22,12 @@ import scala.collection.JavaConverters._
import org.apache.spark.SparkException
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.execution.aggregate
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.functions._
-import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.sql.types._
import org.apache.spark.sql.hive.aggregate.{MyDoubleAvg, MyDoubleSum}
import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types._
class ScalaAggregateFunction(schema: StructType) extends UserDefinedAggregateFunction {
@@ -702,6 +701,13 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
}
}
+ test("no aggregation function (SPARK-11486)") {
+ val df = sqlContext.range(20).selectExpr("id", "repeat(id, 1) as s")
+ .groupBy("s").count()
+ .groupBy().count()
+ checkAnswer(df, Row(20) :: Nil)
+ }
+
test("udaf with all data types") {
val struct =
StructType(