Switch from MUTF8 to UTF8 in PySpark serializers.

This fixes SPARK-1043, a bug introduced in 0.9.0 where PySpark couldn't serialize strings > 64kB. This fix was written by @tyro89 and @bouk in #512. This commit squashes and rebases their pull request in order to fix some merge conflicts.
author: Josh Rosen <joshrosen@apache.org> 2014-01-28 19:50:26 -0800
committer: Josh Rosen <joshrosen@apache.org> 2014-01-28 20:20:08 -0800
commit: 1381fc72f7a34f690a98ab72cec8ffb61e0e564d (patch)
tree: 8ae129c4b291b4b5589a77b919f508c4535fbf2c /python/pyspark/worker.py
parent: 84670f2715392859624df290c1b52eb4ed4a9cb1 (diff)
download: spark-1381fc72f7a34f690a98ab72cec8ffb61e0e564d.tar.gz
spark-1381fc72f7a34f690a98ab72cec8ffb61e0e564d.tar.bz2
spark-1381fc72f7a34f690a98ab72cec8ffb61e0e564d.zip
1 files changed, 4 insertions, 4 deletions
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index d77981f61f..4be4063dcf 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -30,11 +30,11 @@ from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
 from pyspark.files import SparkFiles
 from pyspark.serializers import write_with_length, write_int, read_long, \
-    write_long, read_int, SpecialLengths, MUTF8Deserializer, PickleSerializer
+    write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer
 
 
 pickleSer = PickleSerializer()
-mutf8_deserializer = MUTF8Deserializer()
+utf8_deserializer = UTF8Deserializer()
 
 
 def report_times(outfile, boot, init, finish):
@@ -51,7 +51,7 @@ def main(infile, outfile):
         return
 
     # fetch name of workdir
-    spark_files_dir = mutf8_deserializer.loads(infile)
+    spark_files_dir = utf8_deserializer.loads(infile)
     SparkFiles._root_directory = spark_files_dir
     SparkFiles._is_running_on_worker = True
 
@@ -66,7 +66,7 @@ def main(infile, outfile):
     sys.path.append(spark_files_dir) # *.py files that were added will be copied here
     num_python_includes =  read_int(infile)
     for _ in range(num_python_includes):
-        filename = mutf8_deserializer.loads(infile)
+        filename = utf8_deserializer.loads(infile)
         sys.path.append(os.path.join(spark_files_dir, filename))
 
     command = pickleSer._read_with_length(infile)
author	Josh Rosen <joshrosen@apache.org>	2014-01-28 19:50:26 -0800
committer	Josh Rosen <joshrosen@apache.org>	2014-01-28 20:20:08 -0800
commit	1381fc72f7a34f690a98ab72cec8ffb61e0e564d (patch)
tree	8ae129c4b291b4b5589a77b919f508c4535fbf2c /python/pyspark/worker.py
parent	84670f2715392859624df290c1b52eb4ed4a9cb1 (diff)
download	spark-1381fc72f7a34f690a98ab72cec8ffb61e0e564d.tar.gz spark-1381fc72f7a34f690a98ab72cec8ffb61e0e564d.tar.bz2 spark-1381fc72f7a34f690a98ab72cec8ffb61e0e564d.zip