[SPARK-1687] [PySpark] pickable namedtuple

Add an hook to replace original namedtuple with an pickable one, then namedtuple could be used in RDDs. PS: pyspark should be import BEFORE "from collections import namedtuple" Author: Davies Liu <davies.liu@gmail.com> Closes #1623 from davies/namedtuple and squashes the following commits: 045dad8 [Davies Liu] remove unrelated code changes 4132f32 [Davies Liu] address comment 55b1c1a [Davies Liu] fix tests 61f86eb [Davies Liu] replace all the reference of namedtuple to new hacked one 98df6c6 [Davies Liu] Merge branch 'master' of github.com:apache/spark into namedtuple f7b1bde [Davies Liu] add hack for CloudPickleSerializer 0c5c849 [Davies Liu] Merge branch 'master' of github.com:apache/spark into namedtuple 21991e6 [Davies Liu] hack namedtuple in __main__ module, make it picklable. 93b03b8 [Davies Liu] pickable namedtuple
author: Davies Liu <davies.liu@gmail.com> 2014-08-04 12:13:41 -0700
committer: Josh Rosen <joshrosen@apache.org> 2014-08-04 12:13:41 -0700
commit: 59f84a9531f7974a053fd4963ce9afd88273ea4c (patch)
tree: f2f72b2b2a041624ff4462e777f19874cd477b41 /python/pyspark/tests.py
parent: e053c55819363fab7068bb9165e3379f0c2f570c (diff)
download: spark-59f84a9531f7974a053fd4963ce9afd88273ea4c.tar.gz
spark-59f84a9531f7974a053fd4963ce9afd88273ea4c.tar.bz2
spark-59f84a9531f7974a053fd4963ce9afd88273ea4c.zip
1 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index acc3c30371..4ac94ba729 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -112,6 +112,17 @@ class TestMerger(unittest.TestCase):
         m._cleanup()
 
 
+class SerializationTestCase(unittest.TestCase):
+
+    def test_namedtuple(self):
+        from collections import namedtuple
+        from cPickle import dumps, loads
+        P = namedtuple("P", "x y")
+        p1 = P(1, 3)
+        p2 = loads(dumps(p1, 2))
+        self.assertEquals(p1, p2)
+
+
 class PySparkTestCase(unittest.TestCase):
 
     def setUp(self):
@@ -298,6 +309,14 @@ class TestRDDFunctions(PySparkTestCase):
         self.assertEqual([1], rdd.map(itemgetter(1)).collect())
         self.assertEqual([(2, 3)], rdd.map(itemgetter(2, 3)).collect())
 
+    def test_namedtuple_in_rdd(self):
+        from collections import namedtuple
+        Person = namedtuple("Person", "id firstName lastName")
+        jon = Person(1, "Jon", "Doe")
+        jane = Person(2, "Jane", "Doe")
+        theDoes = self.sc.parallelize([jon, jane])
+        self.assertEquals([jon, jane], theDoes.collect())
+
 
 class TestIO(PySparkTestCase):
author	Davies Liu <davies.liu@gmail.com>	2014-08-04 12:13:41 -0700
committer	Josh Rosen <joshrosen@apache.org>	2014-08-04 12:13:41 -0700
commit	59f84a9531f7974a053fd4963ce9afd88273ea4c (patch)
tree	f2f72b2b2a041624ff4462e777f19874cd477b41 /python/pyspark/tests.py
parent	e053c55819363fab7068bb9165e3379f0c2f570c (diff)
download	spark-59f84a9531f7974a053fd4963ce9afd88273ea4c.tar.gz spark-59f84a9531f7974a053fd4963ce9afd88273ea4c.tar.bz2 spark-59f84a9531f7974a053fd4963ce9afd88273ea4c.zip