aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test
diff options
context:
space:
mode:
authorroot <root@iZbp1gsnrlfzjxh82cz80vZ.(none)>2016-11-08 12:09:32 +0100
committerHerman van Hovell <hvanhovell@databricks.com>2016-11-08 12:09:32 +0100
commitc291bd2745a8a2e4ba91d8697879eb8da10287e2 (patch)
tree5fd2f31509376493cdbd26a188f986961c880836 /sql/hive/src/test
parent47731e1865fa1e3a8881a1f4420017bdc026e455 (diff)
downloadspark-c291bd2745a8a2e4ba91d8697879eb8da10287e2.tar.gz
spark-c291bd2745a8a2e4ba91d8697879eb8da10287e2.tar.bz2
spark-c291bd2745a8a2e4ba91d8697879eb8da10287e2.zip
[SPARK-18137][SQL] Fix RewriteDistinctAggregates UnresolvedException when a UDAF has a foldable TypeCheck
## What changes were proposed in this pull request? In RewriteDistinctAggregates rewrite funtion,after the UDAF's childs are mapped to AttributeRefference, If the UDAF(such as ApproximatePercentile) has a foldable TypeCheck for the input, It will failed because the AttributeRefference is not foldable,then the UDAF is not resolved, and then nullify on the unresolved object will throw a Exception. In this PR, only map Unfoldable child to AttributeRefference, this can avoid the UDAF's foldable TypeCheck. and then only Expand Unfoldable child, there is no need to Expand a static value(foldable value). **Before sql result** > select percentile_approxy(key,0.99999),count(distinct key),sume(distinc key) from src limit 1 > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to dataType on unresolved object, tree: 'percentile_approx(CAST(src.`key` AS DOUBLE), CAST(0.99999BD AS DOUBLE), 10000) > at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.dataType(unresolved.scala:92) > at org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$.org$apache$spark$sql$catalyst$optimizer$RewriteDistinctAggregates$$nullify(RewriteDistinctAggregates.scala:261) **After sql result** > select percentile_approxy(key,0.99999),count(distinct key),sume(distinc key) from src limit 1 > [498.0,309,79136] ## How was this patch tested? Add a test case in HiveUDFSuit. Author: root <root@iZbp1gsnrlfzjxh82cz80vZ.(none)> Closes #15668 from windpiger/RewriteDistinctUDAFUnresolveExcep.
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala35
1 files changed, 35 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index f690035c84..48adc833f4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -150,6 +150,41 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
}
test("Generic UDAF aggregates") {
+
+ checkAnswer(sql(
+ """
+ |SELECT percentile_approx(2, 0.99999),
+ | sum(distinct 1),
+ | count(distinct 1,2,3,4) FROM src LIMIT 1
+ """.stripMargin), sql("SELECT 2, 1, 1 FROM src LIMIT 1").collect().toSeq)
+
+ checkAnswer(sql(
+ """
+ |SELECT ceiling(percentile_approx(distinct key, 0.99999)),
+ | count(distinct key),
+ | sum(distinct key),
+ | count(distinct 1),
+ | sum(distinct 1),
+ | sum(1) FROM src LIMIT 1
+ """.stripMargin),
+ sql(
+ """
+ |SELECT max(key),
+ | count(distinct key),
+ | sum(distinct key),
+ | 1, 1, sum(1) FROM src LIMIT 1
+ """.stripMargin).collect().toSeq)
+
+ checkAnswer(sql(
+ """
+ |SELECT ceiling(percentile_approx(distinct key, 0.9 + 0.09999)),
+ | count(distinct key), sum(distinct key),
+ | count(distinct 1), sum(distinct 1),
+ | sum(1) FROM src LIMIT 1
+ """.stripMargin),
+ sql("SELECT max(key), count(distinct key), sum(distinct key), 1, 1, sum(1) FROM src LIMIT 1")
+ .collect().toSeq)
+
checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999D)) FROM src LIMIT 1"),
sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq)