From 8e67882b905683a1f151679214ef0b575e77c7e1 Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 8 Oct 2015 18:34:15 -0700 Subject: [SPARK-10973] [ML] [PYTHON] __gettitem__ method throws IndexError exception when we… MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __gettitem__ method throws IndexError exception when we try to access index after the last non-zero entry from pyspark.mllib.linalg import Vectors sv = Vectors.sparse(5, {1: 3}) sv[0] ## 0.0 sv[1] ## 3.0 sv[2] ## Traceback (most recent call last): ## File "", line 1, in ## File "/python/pyspark/mllib/linalg/__init__.py", line 734, in __getitem__ ## row_ind = inds[insert_index] ## IndexError: index out of bounds Author: zero323 Closes #9009 from zero323/sparse_vector_index_error. --- python/pyspark/mllib/linalg/__init__.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'python/pyspark/mllib/linalg/__init__.py') diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index ea42127f16..d903b9030d 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -770,6 +770,9 @@ class SparseVector(Vector): raise ValueError("Index %d out of bounds." % index) insert_index = np.searchsorted(inds, index) + if insert_index >= inds.size: + return 0. + row_ind = inds[insert_index] if row_ind == index: return vals[insert_index] -- cgit v1.2.3