diff options
author | Sean Owen <sowen@cloudera.com> | 2015-07-30 09:19:55 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-07-30 09:19:55 -0700 |
commit | ed3cb1d21c73645c8f6e6ee08181f876fc192e41 (patch) | |
tree | 6554574c27a341688d1f74280845c78393f68600 | |
parent | a6e53a9c8b24326d1b6dca7a0e36ce6c643daa77 (diff) | |
download | spark-ed3cb1d21c73645c8f6e6ee08181f876fc192e41.tar.gz spark-ed3cb1d21c73645c8f6e6ee08181f876fc192e41.tar.bz2 spark-ed3cb1d21c73645c8f6e6ee08181f876fc192e41.zip |
[SPARK-9277] [MLLIB] SparseVector constructor must throw an error when declared number of elements less than array length
Check that SparseVector size is at least as big as the number of indices/values provided. And add tests for constructor checks.
CC MechCoder jkbradley -- I am not sure if a change needs to also happen in the Python API? I didn't see it had any similar checks to begin with, but I don't know it well.
Author: Sean Owen <sowen@cloudera.com>
Closes #7794 from srowen/SPARK-9277 and squashes the following commits:
e8dc31e [Sean Owen] Fix scalastyle
6ffe34a [Sean Owen] Check that SparseVector size is at least as big as the number of indices/values provided. And add tests for constructor checks.
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala | 2 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala | 15 |
2 files changed, 17 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 0cb28d78be..23c2c16d68 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -637,6 +637,8 @@ class SparseVector( require(indices.length == values.length, "Sparse vectors require that the dimension of the" + s" indices match the dimension of the values. You provided ${indices.length} indices and " + s" ${values.length} values.") + require(indices.length <= size, s"You provided ${indices.length} indices and values, " + + s"which exceeds the specified vector size ${size}.") override def toString: String = s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})" diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 03be4119bd..1c37ea5123 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -57,6 +57,21 @@ class VectorsSuite extends SparkFunSuite with Logging { assert(vec.values === values) } + test("sparse vector construction with mismatched indices/values array") { + intercept[IllegalArgumentException] { + Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0, 7.0, 9.0)) + } + intercept[IllegalArgumentException] { + Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0)) + } + } + + test("sparse vector construction with too many indices vs size") { + intercept[IllegalArgumentException] { + Vectors.sparse(3, Array(1, 2, 3, 4), Array(3.0, 5.0, 7.0, 9.0)) + } + } + test("dense to array") { val vec = Vectors.dense(arr).asInstanceOf[DenseVector] assert(vec.toArray.eq(arr)) |