aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/tests.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@apache.org>2013-11-28 23:44:56 -0800
committerJosh Rosen <joshrosen@apache.org>2013-11-28 23:44:56 -0800
commit3787f514d9a8e45d2c257b4696e30bc1a1935748 (patch)
tree572553edf58b4d97b54afe1a536f30288bc1db4f /python/pyspark/tests.py
parent743a31a7ca4421cbbd5b615b773997a06a7ab4ee (diff)
downloadspark-3787f514d9a8e45d2c257b4696e30bc1a1935748.tar.gz
spark-3787f514d9a8e45d2c257b4696e30bc1a1935748.tar.bz2
spark-3787f514d9a8e45d2c257b4696e30bc1a1935748.zip
Fix UnicodeEncodeError in PySpark saveAsTextFile().
Fixes SPARK-970.
Diffstat (limited to 'python/pyspark/tests.py')
-rw-r--r--python/pyspark/tests.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 621e1cb58c..3987642bf4 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -19,6 +19,8 @@
Unit tests for PySpark; additional tests are implemented as doctests in
individual modules.
"""
+from fileinput import input
+from glob import glob
import os
import shutil
import sys
@@ -138,6 +140,19 @@ class TestAddFile(PySparkTestCase):
self.assertEqual("Hello World from inside a package!", UserClass().hello())
+class TestRDDFunctions(PySparkTestCase):
+
+ def test_save_as_textfile_with_unicode(self):
+ # Regression test for SPARK-970
+ x = u"\u00A1Hola, mundo!"
+ data = self.sc.parallelize([x])
+ tempFile = NamedTemporaryFile(delete=True)
+ tempFile.close()
+ data.saveAsTextFile(tempFile.name)
+ raw_contents = ''.join(input(glob(tempFile.name + "/part-0000*")))
+ self.assertEqual(x, unicode(raw_contents.strip(), "utf-8"))
+
+
class TestIO(PySparkTestCase):
def test_stdout_redirection(self):