From df360917990ad95dde3c8e016ec42507d1566355 Mon Sep 17 00:00:00 2001 From: Sandeep Date: Tue, 15 Apr 2014 00:19:43 -0700 Subject: SPARK-1426: Make MLlib work with NumPy versions older than 1.7 Currently it requires NumPy 1.7 due to using the copyto method (http://docs.scipy.org/doc/numpy/reference/generated/numpy.copyto.html) for extracting data out of an array. Replace it with a fallback Author: Sandeep Closes #391 from techaddict/1426 and squashes the following commits: d365962 [Sandeep] SPARK-1426: Make MLlib work with NumPy versions older than 1.7 Currently it requires NumPy 1.7 due to using the copyto method (http://docs.scipy.org/doc/numpy/reference/generated/numpy.copyto.html) for extracting data out of an array. Replace it with a fallback --- python/pyspark/mllib/__init__.py | 6 +++--- python/pyspark/mllib/_common.py | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'python/pyspark') diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py index 538ff26ce7..4149f54931 100644 --- a/python/pyspark/mllib/__init__.py +++ b/python/pyspark/mllib/__init__.py @@ -19,8 +19,8 @@ Python bindings for MLlib. """ -# MLlib currently needs and NumPy 1.7+, so complain if lower +# MLlib currently needs and NumPy 1.4+, so complain if lower import numpy -if numpy.version.version < '1.7': - raise Exception("MLlib requires NumPy 1.7+") +if numpy.version.version < '1.4': + raise Exception("MLlib requires NumPy 1.4+") diff --git a/python/pyspark/mllib/_common.py b/python/pyspark/mllib/_common.py index 7ef251d24c..e19f5d2aaa 100644 --- a/python/pyspark/mllib/_common.py +++ b/python/pyspark/mllib/_common.py @@ -15,7 +15,7 @@ # limitations under the License. # -from numpy import ndarray, copyto, float64, int64, int32, ones, array_equal, array, dot, shape, complex, issubdtype +from numpy import ndarray, float64, int64, int32, ones, array_equal, array, dot, shape, complex, issubdtype from pyspark import SparkContext, RDD import numpy as np @@ -72,8 +72,8 @@ def _serialize_double_vector(v): header = ndarray(shape=[2], buffer=ba, dtype="int64") header[0] = 1 header[1] = length - copyto(ndarray(shape=[length], buffer=ba, offset=16, - dtype="float64"), v) + arr_mid = ndarray(shape=[length], buffer=ba, offset=16, dtype="float64") + arr_mid[...] = v return ba def _deserialize_double_vector(ba): @@ -112,8 +112,9 @@ def _serialize_double_matrix(m): header[0] = 2 header[1] = rows header[2] = cols - copyto(ndarray(shape=[rows, cols], buffer=ba, offset=24, - dtype="float64", order='C'), m) + arr_mid = ndarray(shape=[rows, cols], buffer=ba, offset=24, + dtype="float64", order='C') + arr_mid[...] = m return ba else: raise TypeError("_serialize_double_matrix called on a " -- cgit v1.2.3