aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-07-30 09:19:55 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-30 09:19:55 -0700
commited3cb1d21c73645c8f6e6ee08181f876fc192e41 (patch)
tree6554574c27a341688d1f74280845c78393f68600 /mllib
parenta6e53a9c8b24326d1b6dca7a0e36ce6c643daa77 (diff)
downloadspark-ed3cb1d21c73645c8f6e6ee08181f876fc192e41.tar.gz
spark-ed3cb1d21c73645c8f6e6ee08181f876fc192e41.tar.bz2
spark-ed3cb1d21c73645c8f6e6ee08181f876fc192e41.zip
[SPARK-9277] [MLLIB] SparseVector constructor must throw an error when declared number of elements less than array length
Check that SparseVector size is at least as big as the number of indices/values provided. And add tests for constructor checks. CC MechCoder jkbradley -- I am not sure if a change needs to also happen in the Python API? I didn't see it had any similar checks to begin with, but I don't know it well. Author: Sean Owen <sowen@cloudera.com> Closes #7794 from srowen/SPARK-9277 and squashes the following commits: e8dc31e [Sean Owen] Fix scalastyle 6ffe34a [Sean Owen] Check that SparseVector size is at least as big as the number of indices/values provided. And add tests for constructor checks.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala15
2 files changed, 17 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 0cb28d78be..23c2c16d68 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -637,6 +637,8 @@ class SparseVector(
require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
s" ${values.length} values.")
+ require(indices.length <= size, s"You provided ${indices.length} indices and values, " +
+ s"which exceeds the specified vector size ${size}.")
override def toString: String =
s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})"
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index 03be4119bd..1c37ea5123 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -57,6 +57,21 @@ class VectorsSuite extends SparkFunSuite with Logging {
assert(vec.values === values)
}
+ test("sparse vector construction with mismatched indices/values array") {
+ intercept[IllegalArgumentException] {
+ Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0, 7.0, 9.0))
+ }
+ intercept[IllegalArgumentException] {
+ Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0))
+ }
+ }
+
+ test("sparse vector construction with too many indices vs size") {
+ intercept[IllegalArgumentException] {
+ Vectors.sparse(3, Array(1, 2, 3, 4), Array(3.0, 5.0, 7.0, 9.0))
+ }
+ }
+
test("dense to array") {
val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
assert(vec.toArray.eq(arr))