aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-12-29 16:58:23 -0800
committerReynold Xin <rxin@databricks.com>2015-12-29 16:58:23 -0800
commit270a659584b6c1c304a9f9a331c56287672e00b0 (patch)
treef201c1f77f6c272f16eca4539edf2660284104c9 /sql/catalyst
parentbe86268eb54e3fa0a9ce7a07359b3e67731ed8b5 (diff)
downloadspark-270a659584b6c1c304a9f9a331c56287672e00b0.tar.gz
spark-270a659584b6c1c304a9f9a331c56287672e00b0.tar.bz2
spark-270a659584b6c1c304a9f9a331c56287672e00b0.zip
[SPARK-12549][SQL] Take Option[Seq[DataType]] in UDF input type specification.
In Spark we allow UDFs to declare its expected input types in order to apply type coercion. The expected input type parameter takes a Seq[DataType] and uses Nil when no type coercion is applied. It makes more sense to take Option[Seq[DataType]] instead, so we can differentiate a no-arg function vs function with no expected input type specified. Author: Reynold Xin <rxin@databricks.com> Closes #10504 from rxin/SPARK-12549.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala12
1 files changed, 8 insertions, 4 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 85faa19bbf..64d397bf84 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -30,7 +30,10 @@ import org.apache.spark.sql.types.DataType
* null. Use boxed type or [[Option]] if you wanna do the null-handling yourself.
* @param dataType Return type of function.
* @param children The input expressions of this UDF.
- * @param inputTypes The expected input types of this UDF.
+ * @param inputTypes The expected input types of this UDF, used to perform type coercion. If we do
+ * not want to perform coercion, simply use "Nil". Note that it would've been
+ * better to use Option of Seq[DataType] so we can use "None" as the case for no
+ * type coercion. However, that would require more refactoring of the codebase.
*/
case class ScalaUDF(
function: AnyRef,
@@ -43,7 +46,7 @@ case class ScalaUDF(
override def toString: String = s"UDF(${children.mkString(",")})"
- // scalastyle:off
+ // scalastyle:off line.size.limit
/** This method has been generated by this script
@@ -969,7 +972,7 @@ case class ScalaUDF(
}
}
- // scalastyle:on
+ // scalastyle:on line.size.limit
// Generate codes used to convert the arguments to Scala type for user-defined funtions
private[this] def genCodeForConverter(ctx: CodeGenContext, index: Int): String = {
@@ -1010,7 +1013,7 @@ case class ScalaUDF(
// This must be called before children expressions' codegen
// because ctx.references is used in genCodeForConverter
- val converterTerms = (0 until children.size).map(genCodeForConverter(ctx, _))
+ val converterTerms = children.indices.map(genCodeForConverter(ctx, _))
// Initialize user-defined function
val funcClassName = s"scala.Function${children.size}"
@@ -1054,5 +1057,6 @@ case class ScalaUDF(
}
private[this] val converter = CatalystTypeConverters.createToCatalystConverter(dataType)
+
override def eval(input: InternalRow): Any = converter(f(input))
}