aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-10-17 14:12:07 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-10-17 14:12:07 -0700
commitadcb7d3350032dda69a43de724c8bdff5fef2c67 (patch)
treef65f1f1fe89831ad2c92bbe82056cd70ca62a02c /python/pyspark
parent803e7f087797bae643754f8db88848a17282ca6e (diff)
downloadspark-adcb7d3350032dda69a43de724c8bdff5fef2c67.tar.gz
spark-adcb7d3350032dda69a43de724c8bdff5fef2c67.tar.bz2
spark-adcb7d3350032dda69a43de724c8bdff5fef2c67.zip
[SPARK-3855][SQL] Preserve the result attribute of python UDFs though transformations
In the current implementation it was possible for the reference to change after analysis. Author: Michael Armbrust <michael@databricks.com> Closes #2717 from marmbrus/pythonUdfResults and squashes the following commits: da14879 [Michael Armbrust] Fix test 6343bcb [Michael Armbrust] add test 9533286 [Michael Armbrust] Correctly preserve the result attribute of python UDFs though transformations
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/tests.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index ceab57464f..f5ccf31abb 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -683,6 +683,12 @@ class SQLTests(ReusedPySparkTestCase):
[row] = self.sqlCtx.sql("SELECT twoArgs('test', 1)").collect()
self.assertEqual(row[0], 5)
+ def test_udf2(self):
+ self.sqlCtx.registerFunction("strlen", lambda string: len(string))
+ self.sqlCtx.inferSchema(self.sc.parallelize([Row(a="test")])).registerTempTable("test")
+ [res] = self.sqlCtx.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
+ self.assertEqual(u"4", res[0])
+
def test_broadcast_in_udf(self):
bar = {"a": "aa", "b": "bb", "c": "abc"}
foo = self.sc.broadcast(bar)