aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzero323 <zero323@users.noreply.github.com>2017-02-14 09:46:22 -0800
committerHolden Karau <holden@us.ibm.com>2017-02-14 09:46:22 -0800
commite0eeb0f89fffb52cd4d15970bdf00c3c5d1eea88 (patch)
tree4997e6cdb8ccec0b904aa9d52521370c1beacae8
parent9c4405e8e801cbab3a5c78c9f4334775925dfcc4 (diff)
downloadspark-e0eeb0f89fffb52cd4d15970bdf00c3c5d1eea88.tar.gz
spark-e0eeb0f89fffb52cd4d15970bdf00c3c5d1eea88.tar.bz2
spark-e0eeb0f89fffb52cd4d15970bdf00c3c5d1eea88.zip
[SPARK-19162][PYTHON][SQL] UserDefinedFunction should validate that func is callable
## What changes were proposed in this pull request? UDF constructor checks if `func` argument is callable and if it is not, fails fast instead of waiting for an action. ## How was this patch tested? Unit tests. Author: zero323 <zero323@users.noreply.github.com> Closes #16535 from zero323/SPARK-19162.
-rw-r--r--python/pyspark/sql/functions.py5
-rw-r--r--python/pyspark/sql/tests.py7
2 files changed, 12 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 5213a3c358..4f4ae10892 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1864,6 +1864,11 @@ class UserDefinedFunction(object):
.. versionadded:: 1.3
"""
def __init__(self, func, returnType, name=None):
+ if not callable(func):
+ raise TypeError(
+ "Not a function or callable (__call__ is not defined): "
+ "{0}".format(type(func)))
+
self.func = func
self.returnType = (
returnType if isinstance(returnType, DataType)
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index d9d03337ff..73721674f6 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -504,6 +504,13 @@ class SQLTests(ReusedPySparkTestCase):
self.assertTupleEqual(expected, actual)
+ def test_udf_shouldnt_accept_noncallable_object(self):
+ from pyspark.sql.functions import UserDefinedFunction
+ from pyspark.sql.types import StringType
+
+ non_callable = None
+ self.assertRaises(TypeError, UserDefinedFunction, non_callable, StringType())
+
def test_basic_functions(self):
rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
df = self.spark.read.json(rdd)