aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/pyspark/cloudpickle.py6
-rw-r--r--python/pyspark/tests.py11
2 files changed, 12 insertions, 5 deletions
diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index 68062483de..80e51d1a58 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -657,7 +657,6 @@ class CloudPickler(pickle.Pickler):
def save_file(self, obj):
"""Save a file"""
import StringIO as pystringIO #we can't use cStringIO as it lacks the name attribute
- from ..transport.adapter import SerializingAdapter
if not hasattr(obj, 'name') or not hasattr(obj, 'mode'):
raise pickle.PicklingError("Cannot pickle files that do not map to an actual file")
@@ -691,13 +690,10 @@ class CloudPickler(pickle.Pickler):
tmpfile.close()
if tst != '':
raise pickle.PicklingError("Cannot pickle file %s as it does not appear to map to a physical, real file" % name)
- elif fsize > SerializingAdapter.max_transmit_data:
- raise pickle.PicklingError("Cannot pickle file %s as it exceeds cloudconf.py's max_transmit_data of %d" %
- (name,SerializingAdapter.max_transmit_data))
else:
try:
tmpfile = file(name)
- contents = tmpfile.read(SerializingAdapter.max_transmit_data)
+ contents = tmpfile.read()
tmpfile.close()
except IOError:
raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name)
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 2ade15b35a..9fbeb36f4f 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -169,6 +169,17 @@ class SerializationTestCase(unittest.TestCase):
self.assertEquals(p1, p2)
+# Regression test for SPARK-3415
+class CloudPickleTest(unittest.TestCase):
+ def test_pickling_file_handles(self):
+ from pyspark.cloudpickle import dumps
+ from StringIO import StringIO
+ from pickle import load
+ out1 = sys.stderr
+ out2 = load(StringIO(dumps(out1)))
+ self.assertEquals(out1, out2)
+
+
class PySparkTestCase(unittest.TestCase):
def setUp(self):