aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-08-02 16:33:48 -0700
committerMichael Armbrust <michael@databricks.com>2014-08-02 16:33:48 -0700
commit158ad0bba9382fd494b4789b5628a9cec00cfa19 (patch)
tree761acc1c8694a167043fc8f45bfa49447d6c1f2d /sql/catalyst
parent4c477117bb1ffef463776c86f925d35036f96b7a (diff)
downloadspark-158ad0bba9382fd494b4789b5628a9cec00cfa19.tar.gz
spark-158ad0bba9382fd494b4789b5628a9cec00cfa19.tar.bz2
spark-158ad0bba9382fd494b4789b5628a9cec00cfa19.zip
[SPARK-2097][SQL] UDF Support
This patch adds the ability to register lambda functions written in Python, Java or Scala as UDFs for use in SQL or HiveQL. Scala: ```scala registerFunction("strLenScala", (_: String).length) sql("SELECT strLenScala('test')") ``` Python: ```python sqlCtx.registerFunction("strLenPython", lambda x: len(x), IntegerType()) sqlCtx.sql("SELECT strLenPython('test')") ``` Java: ```java sqlContext.registerFunction("stringLengthJava", new UDF1<String, Integer>() { Override public Integer call(String str) throws Exception { return str.length(); } }, DataType.IntegerType); sqlContext.sql("SELECT stringLengthJava('test')"); ``` Author: Michael Armbrust <michael@databricks.com> Closes #1063 from marmbrus/udfs and squashes the following commits: 9eda0fe [Michael Armbrust] newline 747c05e [Michael Armbrust] Add some scala UDF tests. d92727d [Michael Armbrust] Merge remote-tracking branch 'apache/master' into udfs 005d684 [Michael Armbrust] Fix naming and formatting. d14dac8 [Michael Armbrust] Fix last line of autogened java files. 8135c48 [Michael Armbrust] Move UDF unit tests to pyspark. 40b0ffd [Michael Armbrust] Merge remote-tracking branch 'apache/master' into udfs 6a36890 [Michael Armbrust] Switch logging so that SQLContext can be serializable. 7a83101 [Michael Armbrust] Drop toString 795fd15 [Michael Armbrust] Try to avoid capturing SQLContext. e54fb45 [Michael Armbrust] Docs and tests. 437cbe3 [Michael Armbrust] Update use of dataTypes, fix some python tests, address review comments. 01517d6 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into udfs 8e6c932 [Michael Armbrust] WIP 3f96a52 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into udfs 6237c8d [Michael Armbrust] WIP 2766f0b [Michael Armbrust] Move udfs support to SQL from hive. Add support for Java UDFs. 0f7d50c [Michael Armbrust] Draft of native Spark SQL UDFs for Scala and Python.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala32
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala307
2 files changed, 339 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index c0255701b7..760c49fbca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -18,17 +18,49 @@
package org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.catalyst.expressions.Expression
+import scala.collection.mutable
/** A catalog for looking up user defined functions, used by an [[Analyzer]]. */
trait FunctionRegistry {
+ type FunctionBuilder = Seq[Expression] => Expression
+
+ def registerFunction(name: String, builder: FunctionBuilder): Unit
+
def lookupFunction(name: String, children: Seq[Expression]): Expression
}
+trait OverrideFunctionRegistry extends FunctionRegistry {
+
+ val functionBuilders = new mutable.HashMap[String, FunctionBuilder]()
+
+ def registerFunction(name: String, builder: FunctionBuilder) = {
+ functionBuilders.put(name, builder)
+ }
+
+ abstract override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+ functionBuilders.get(name).map(_(children)).getOrElse(super.lookupFunction(name,children))
+ }
+}
+
+class SimpleFunctionRegistry extends FunctionRegistry {
+ val functionBuilders = new mutable.HashMap[String, FunctionBuilder]()
+
+ def registerFunction(name: String, builder: FunctionBuilder) = {
+ functionBuilders.put(name, builder)
+ }
+
+ override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+ functionBuilders(name)(children)
+ }
+}
+
/**
* A trivial catalog that returns an error when a function is requested. Used for testing when all
* functions are already filled in and the analyser needs only to resolve attribute references.
*/
object EmptyFunctionRegistry extends FunctionRegistry {
+ def registerFunction(name: String, builder: FunctionBuilder) = ???
+
def lookupFunction(name: String, children: Seq[Expression]): Expression = {
throw new UnsupportedOperationException
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
index acddf5e9c7..95633dd0c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
@@ -27,6 +27,22 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi
def references = children.flatMap(_.references).toSet
def nullable = true
+ /** This method has been generated by this script
+
+ (1 to 22).map { x =>
+ val anys = (1 to x).map(x => "Any").reduce(_ + ", " + _)
+ val evals = (0 to x - 1).map(x => s"children($x).eval(input)").reduce(_ + ",\n " + _)
+
+ s"""
+ case $x =>
+ function.asInstanceOf[($anys) => Any](
+ $evals)
+ """
+ }
+
+ */
+
+ // scalastyle:off
override def eval(input: Row): Any = {
children.size match {
case 0 => function.asInstanceOf[() => Any]()
@@ -35,6 +51,297 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi
function.asInstanceOf[(Any, Any) => Any](
children(0).eval(input),
children(1).eval(input))
+ case 3 =>
+ function.asInstanceOf[(Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input))
+ case 4 =>
+ function.asInstanceOf[(Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input))
+ case 5 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input))
+ case 6 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input))
+ case 7 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input))
+ case 8 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input))
+ case 9 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input))
+ case 10 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input))
+ case 11 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input))
+ case 12 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input))
+ case 13 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input))
+ case 14 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input))
+ case 15 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input))
+ case 16 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input),
+ children(15).eval(input))
+ case 17 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input),
+ children(15).eval(input),
+ children(16).eval(input))
+ case 18 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input),
+ children(15).eval(input),
+ children(16).eval(input),
+ children(17).eval(input))
+ case 19 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input),
+ children(15).eval(input),
+ children(16).eval(input),
+ children(17).eval(input),
+ children(18).eval(input))
+ case 20 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input),
+ children(15).eval(input),
+ children(16).eval(input),
+ children(17).eval(input),
+ children(18).eval(input),
+ children(19).eval(input))
+ case 21 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input),
+ children(15).eval(input),
+ children(16).eval(input),
+ children(17).eval(input),
+ children(18).eval(input),
+ children(19).eval(input),
+ children(20).eval(input))
+ case 22 =>
+ function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+ children(0).eval(input),
+ children(1).eval(input),
+ children(2).eval(input),
+ children(3).eval(input),
+ children(4).eval(input),
+ children(5).eval(input),
+ children(6).eval(input),
+ children(7).eval(input),
+ children(8).eval(input),
+ children(9).eval(input),
+ children(10).eval(input),
+ children(11).eval(input),
+ children(12).eval(input),
+ children(13).eval(input),
+ children(14).eval(input),
+ children(15).eval(input),
+ children(16).eval(input),
+ children(17).eval(input),
+ children(18).eval(input),
+ children(19).eval(input),
+ children(20).eval(input),
+ children(21).eval(input))
}
+ // scalastyle:on
}
}