aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenFradet <benjamin.fradet@gmail.com>2015-06-22 15:06:47 -0700
committerMichael Armbrust <michael@databricks.com>2015-06-22 15:06:47 -0700
commit50d3242d6a5530a51dacab249e3f3d49e2d50635 (patch)
treee73e44841f6a8c42fd6549f1fd0c8155c11b715e
parentb1f3a489efc6f4f9d172344c3345b9b38ae235e0 (diff)
downloadspark-50d3242d6a5530a51dacab249e3f3d49e2d50635.tar.gz
spark-50d3242d6a5530a51dacab249e3f3d49e2d50635.tar.bz2
spark-50d3242d6a5530a51dacab249e3f3d49e2d50635.zip
[SPARK-8356] [SQL] Reconcile callUDF and callUdf
Deprecates ```callUdf``` in favor of ```callUDF```. Author: BenFradet <benjamin.fradet@gmail.com> Closes #6902 from BenFradet/SPARK-8356 and squashes the following commits: ef4e9d8 [BenFradet] deprecated callUDF, use udf instead 9b1de4d [BenFradet] reinstated unit test for the deprecated callUdf cbd80a5 [BenFradet] deprecated callUdf in favor of callUDF
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala45
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala11
2 files changed, 55 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 7e7a099a83..8cea826ae6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1448,7 +1448,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = {
ScalaUdf(f, returnType, Seq($argsInUdf))
}""")
@@ -1584,7 +1586,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function0[_], returnType: DataType): Column = {
ScalaUdf(f, returnType, Seq())
}
@@ -1595,7 +1599,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr))
}
@@ -1606,7 +1612,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr))
}
@@ -1617,7 +1625,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr))
}
@@ -1628,7 +1638,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr))
}
@@ -1639,7 +1651,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr))
}
@@ -1650,7 +1664,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr))
}
@@ -1661,7 +1677,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr))
}
@@ -1672,7 +1690,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr))
}
@@ -1683,7 +1703,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr))
}
@@ -1694,7 +1716,9 @@ object functions {
*
* @group udf_funcs
* @since 1.3.0
+ * @deprecated As of 1.5.0, since it's redundant with udf()
*/
+ @deprecated("Use udf", "1.5.0")
def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr))
}
@@ -1710,12 +1734,33 @@ object functions {
* val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
* val sqlContext = df.sqlContext
* sqlContext.udf.register("simpleUdf", (v: Int) => v * v)
+ * df.select($"id", callUDF("simpleUdf", $"value"))
+ * }}}
+ *
+ * @group udf_funcs
+ * @since 1.5.0
+ */
+ def callUDF(udfName: String, cols: Column*): Column = {
+ UnresolvedFunction(udfName, cols.map(_.expr))
+ }
+
+ /**
+ * Call an user-defined function.
+ * Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
+ * val sqlContext = df.sqlContext
+ * sqlContext.udf.register("simpleUdf", (v: Int) => v * v)
* df.select($"id", callUdf("simpleUdf", $"value"))
* }}}
*
* @group udf_funcs
* @since 1.4.0
+ * @deprecated As of 1.5.0, since it was not coherent to have two functions callUdf and callUDF
*/
+ @deprecated("Use callUDF", "1.5.0")
def callUdf(udfName: String, cols: Column*): Column = {
UnresolvedFunction(udfName, cols.map(_.expr))
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index ba1d020f22..47443a917b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -301,7 +301,7 @@ class DataFrameSuite extends QueryTest {
)
}
- test("call udf in SQLContext") {
+ test("deprecated callUdf in SQLContext") {
val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
val sqlctx = df.sqlContext
sqlctx.udf.register("simpleUdf", (v: Int) => v * v)
@@ -310,6 +310,15 @@ class DataFrameSuite extends QueryTest {
Row("id1", 1) :: Row("id2", 16) :: Row("id3", 25) :: Nil)
}
+ test("callUDF in SQLContext") {
+ val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
+ val sqlctx = df.sqlContext
+ sqlctx.udf.register("simpleUDF", (v: Int) => v * v)
+ checkAnswer(
+ df.select($"id", callUDF("simpleUDF", $"value")),
+ Row("id1", 1) :: Row("id2", 16) :: Row("id3", 25) :: Nil)
+ }
+
test("withColumn") {
val df = testData.toDF().withColumn("newCol", col("key") + 1)
checkAnswer(