From adcb7d3350032dda69a43de724c8bdff5fef2c67 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 17 Oct 2014 14:12:07 -0700
Subject: [SPARK-3855][SQL] Preserve the result attribute of python UDFs though
 transformations

In the current implementation it was possible for the reference to change after analysis.

Author: Michael Armbrust <michael@databricks.com>

Closes #2717 from marmbrus/pythonUdfResults and squashes the following commits:

da14879 [Michael Armbrust] Fix test
6343bcb [Michael Armbrust] add test
9533286 [Michael Armbrust] Correctly preserve the result attribute of python UDFs though transformations
---
 python/pyspark/tests.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'python/pyspark')

diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index ceab57464f..f5ccf31abb 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -683,6 +683,12 @@ class SQLTests(ReusedPySparkTestCase):
         [row] = self.sqlCtx.sql("SELECT twoArgs('test', 1)").collect()
         self.assertEqual(row[0], 5)
 
+    def test_udf2(self):
+        self.sqlCtx.registerFunction("strlen", lambda string: len(string))
+        self.sqlCtx.inferSchema(self.sc.parallelize([Row(a="test")])).registerTempTable("test")
+        [res] = self.sqlCtx.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
+        self.assertEqual(u"4", res[0])
+
     def test_broadcast_in_udf(self):
         bar = {"a": "aa", "b": "bb", "c": "abc"}
         foo = self.sc.broadcast(bar)
-- 
cgit v1.2.3