aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/mllib-guide.md3
-rw-r--r--docs/python-programming-guide.md2
-rw-r--r--python/pyspark/mllib/__init__.py6
-rw-r--r--python/pyspark/serializers.py11
4 files changed, 13 insertions, 9 deletions
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 203d235bf9..a5e0cc5080 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -38,6 +38,5 @@ depends on native Fortran routines. You may need to install the
if it is not already present on your nodes. MLlib will throw a linking error if it cannot
detect these libraries automatically.
-To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.7 or newer
-and Python 2.7.
+To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.7 or newer.
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index cbe7d820b4..c2e5327324 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -152,7 +152,7 @@ Many of the methods also contain [doctests](http://docs.python.org/2/library/doc
# Libraries
[MLlib](mllib-guide.html) is also available in PySpark. To use it, you'll need
-[NumPy](http://www.numpy.org) version 1.7 or newer, and Python 2.7. The [MLlib guide](mllib-guide.html) contains
+[NumPy](http://www.numpy.org) version 1.7 or newer. The [MLlib guide](mllib-guide.html) contains
some example applications.
# Where to Go from Here
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index b420d7a7f2..538ff26ce7 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -19,11 +19,7 @@
Python bindings for MLlib.
"""
-# MLlib currently needs Python 2.7+ and NumPy 1.7+, so complain if lower
-
-import sys
-if sys.version_info[0:2] < (2, 7):
- raise Exception("MLlib requires Python 2.7+")
+# MLlib currently needs and NumPy 1.7+, so complain if lower
import numpy
if numpy.version.version < '1.7':
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 4d802924df..b253807974 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -64,6 +64,7 @@ import cPickle
from itertools import chain, izip, product
import marshal
import struct
+import sys
from pyspark import cloudpickle
@@ -113,6 +114,11 @@ class FramedSerializer(Serializer):
where C{length} is a 32-bit integer and data is C{length} bytes.
"""
+ def __init__(self):
+ # On Python 2.6, we can't write bytearrays to streams, so we need to convert them
+ # to strings first. Check if the version number is that old.
+ self._only_write_strings = sys.version_info[0:2] <= (2, 6)
+
def dump_stream(self, iterator, stream):
for obj in iterator:
self._write_with_length(obj, stream)
@@ -127,7 +133,10 @@ class FramedSerializer(Serializer):
def _write_with_length(self, obj, stream):
serialized = self.dumps(obj)
write_int(len(serialized), stream)
- stream.write(serialized)
+ if self._only_write_strings:
+ stream.write(str(serialized))
+ else:
+ stream.write(serialized)
def _read_with_length(self, stream):
length = read_int(stream)