aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/main
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-06-29 16:08:10 -0700
committerReynold Xin <rxin@databricks.com>2016-06-29 16:08:10 -0700
commit2eaabfa4142d4050be2b45fd277ff5c7fa430581 (patch)
tree9fa2b9a111b7003a5a07303e5f04f249bd84cd4e /sql/hive/src/main
parent23c58653f900bfb71ef2b3186a95ad2562c33969 (diff)
downloadspark-2eaabfa4142d4050be2b45fd277ff5c7fa430581.tar.gz
spark-2eaabfa4142d4050be2b45fd277ff5c7fa430581.tar.bz2
spark-2eaabfa4142d4050be2b45fd277ff5c7fa430581.zip
[SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions for decimal param lookups
## What changes were proposed in this pull request? This PR supports a fallback lookup by casting `DecimalType` into `DoubleType` for the external functions with `double`-type parameter. **Reported Error Scenarios** ```scala scala> sql("select percentile(value, 0.5) from values 1,2,3 T(value)") org.apache.spark.sql.AnalysisException: ... No matching method for class org.apache.hadoop.hive.ql.udf.UDAFPercentile with (int, decimal(38,18)). Possible choices: _FUNC_(bigint, array<double>) _FUNC_(bigint, double) ; line 1 pos 7 scala> sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0 T(value)") org.apache.spark.sql.AnalysisException: ... Only a float/double or float/double array argument is accepted as parameter 2, but decimal(38,18) was passed instead.; line 1 pos 7 ``` ## How was this patch tested? Pass the Jenkins tests (including a new testcase). Author: Dongjoon Hyun <dongjoon@apache.org> Closes #13930 from dongjoon-hyun/SPARK-16228.
Diffstat (limited to 'sql/hive/src/main')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala16
1 files changed, 15 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 8a47dcf908..2589b9d4a0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -30,12 +30,13 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExpressionInfo}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
import org.apache.spark.sql.hive.client.HiveClient
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DecimalType, DoubleType}
import org.apache.spark.util.Utils
@@ -163,6 +164,19 @@ private[sql] class HiveSessionCatalog(
}
override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
+ try {
+ lookupFunction0(name, children)
+ } catch {
+ case NonFatal(_) =>
+ // SPARK-16228 ExternalCatalog may recognize `double`-type only.
+ val newChildren = children.map { child =>
+ if (child.dataType.isInstanceOf[DecimalType]) Cast(child, DoubleType) else child
+ }
+ lookupFunction0(name, newChildren)
+ }
+ }
+
+ private def lookupFunction0(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
// TODO: Once lookupFunction accepts a FunctionIdentifier, we should refactor this method to
// if (super.functionExists(name)) {
// super.lookupFunction(name, children)