diff options
author | Daoyuan Wang <daoyuan.wang@intel.com> | 2014-09-23 11:47:53 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-09-23 11:47:53 -0700 |
commit | 116016b481cecbd8ad6e9717d92f977a164a6653 (patch) | |
tree | d92f42e70f910cfbad3041b24568a86f93a3d523 | |
parent | 66bc0f2d675d06cdd48638f124a1ff32be2bf456 (diff) | |
download | spark-116016b481cecbd8ad6e9717d92f977a164a6653.tar.gz spark-116016b481cecbd8ad6e9717d92f977a164a6653.tar.bz2 spark-116016b481cecbd8ad6e9717d92f977a164a6653.zip |
[SPARK-3582][SQL] not limit argument type for hive simple udf
Since we have moved to `ConventionHelper`, it is quite easy to avoid call `javaClassToDataType` in hive simple udf. This will solve SPARK-3582.
Author: Daoyuan Wang <daoyuan.wang@intel.com>
Closes #2506 from adrian-wang/spark3582 and squashes the following commits:
450c28e [Daoyuan Wang] not limit argument type for hive simple udf
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 4 | ||||
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala | 22 |
2 files changed, 4 insertions, 22 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 943bbaa8ce..fa889ec104 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -137,7 +137,7 @@ private[hive] trait HiveInspectors { /** Converts native catalyst types to the types expected by Hive */ def wrap(a: Any): AnyRef = a match { - case s: String => new hadoopIo.Text(s) // TODO why should be Text? + case s: String => s: java.lang.String case i: Int => i: java.lang.Integer case b: Boolean => b: java.lang.Boolean case f: Float => f: java.lang.Float @@ -145,7 +145,7 @@ private[hive] trait HiveInspectors { case l: Long => l: java.lang.Long case l: Short => l: java.lang.Short case l: Byte => l: java.lang.Byte - case b: BigDecimal => b.bigDecimal + case b: BigDecimal => new HiveDecimal(b.underlying()) case b: Array[Byte] => b case t: java.sql.Timestamp => t case s: Seq[_] => seqAsJavaList(s.map(wrap)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 19ff3b66ad..68944ed4ef 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -51,19 +51,7 @@ private[hive] abstract class HiveFunctionRegistry val functionClassName = functionInfo.getFunctionClass.getName if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) { - val function = functionInfo.getFunctionClass.newInstance().asInstanceOf[UDF] - val method = function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo)) - - val expectedDataTypes = method.getParameterTypes.map(javaClassToDataType) - - HiveSimpleUdf( - functionClassName, - children.zip(expectedDataTypes).map { - case (e, NullType) => e - case (e, t) if (e.dataType == t) => e - case (e, t) => Cast(e, t) - } - ) + HiveSimpleUdf(functionClassName, children) } else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) { HiveGenericUdf(functionClassName, children) } else if ( @@ -117,15 +105,9 @@ private[hive] case class HiveSimpleUdf(functionClassName: String, children: Seq[ @transient lazy val dataType = javaClassToDataType(method.getReturnType) - def catalystToHive(value: Any): Object = value match { - // TODO need more types here? or can we use wrap() - case bd: BigDecimal => new HiveDecimal(bd.underlying()) - case d => d.asInstanceOf[Object] - } - // TODO: Finish input output types. override def eval(input: Row): Any = { - val evaluatedChildren = children.map(c => catalystToHive(c.eval(input))) + val evaluatedChildren = children.map(c => wrap(c.eval(input))) unwrap(FunctionRegistry.invoke(method, function, conversionHelper .convertIfNecessary(evaluatedChildren: _*): _*)) |