From ba46bbed5d32aec0f11f0b71c82bba8dbe19f05a Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@apache.org>
Date: Sat, 26 Jul 2014 17:37:05 -0700
Subject: [SPARK-2601] [PySpark] Fix Py4J error when transforming pickleFiles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Similar to SPARK-1034, the problem was that Py4J didn’t cope well with the fake ClassTags used in the Java API.  It doesn’t look like there’s any reason why PythonRDD needs to take a ClassTag, since it just ignores the type of the previous RDD, so I removed the type parameter and we no longer pass ClassTags from Python.

Author: Josh Rosen <joshrosen@apache.org>

Closes #1605 from JoshRosen/spark-2601 and squashes the following commits:

b68e118 [Josh Rosen] Fix Py4J error when transforming pickleFiles [SPARK-2601]
---
 python/pyspark/tests.py | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'python/pyspark/tests.py')

diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index a92abbf371..8ba51461d1 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -226,6 +226,15 @@ class TestRDDFunctions(PySparkTestCase):
         cart = rdd1.cartesian(rdd2)
         result = cart.map(lambda (x, y): x + y).collect()
 
+    def test_transforming_pickle_file(self):
+        # Regression test for SPARK-2601
+        data = self.sc.parallelize(["Hello", "World!"])
+        tempFile = tempfile.NamedTemporaryFile(delete=True)
+        tempFile.close()
+        data.saveAsPickleFile(tempFile.name)
+        pickled_file = self.sc.pickleFile(tempFile.name)
+        pickled_file.map(lambda x: x).collect()
+
     def test_cartesian_on_textfile(self):
         # Regression test for
         path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
-- 
cgit v1.2.3