aboutsummaryrefslogtreecommitdiff
path: root/mllib/src
diff options
context:
space:
mode:
authorFunes <tianshaocun@gmail.com>2014-05-08 17:54:10 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-05-08 17:54:17 -0700
commit9ed17ff34503854c676986f1553a103a370d66c6 (patch)
treec302f1a0a853e36e59bfdc788e1df456687e6acf /mllib/src
parent34529975e26810c35a8a1b603298ae2f700a71e3 (diff)
downloadspark-9ed17ff34503854c676986f1553a103a370d66c6.tar.gz
spark-9ed17ff34503854c676986f1553a103a370d66c6.tar.bz2
spark-9ed17ff34503854c676986f1553a103a370d66c6.zip
Bug fix of sparse vector conversion
Fixed a small bug caused by the inconsistency of index/data array size and vector length. Author: Funes <tianshaocun@gmail.com> Author: funes <tianshaocun@gmail.com> Closes #661 from funes/bugfix and squashes the following commits: edb2b9d [funes] remove unused import 75dced3 [Funes] update test case d129a66 [Funes] Add test for sparse breeze by vector builder 64e7198 [Funes] Copy data only when necessary b85806c [Funes] Bug fix of sparse vector conversion (cherry picked from commit 191279ce4edb940821d11a6b25cd33c8ad0af054) Signed-off-by: Patrick Wendell <pwendell@gmail.com>
Diffstat (limited to 'mllib/src')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala9
2 files changed, 14 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 7cdf6bd56a..84d223908c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -136,7 +136,11 @@ object Vectors {
new DenseVector(v.toArray) // Can't use underlying array directly, so make a new one
}
case v: BSV[Double] =>
- new SparseVector(v.length, v.index, v.data)
+ if (v.index.length == v.used) {
+ new SparseVector(v.length, v.index, v.data)
+ } else {
+ new SparseVector(v.length, v.index.slice(0, v.used), v.data.slice(0, v.used))
+ }
case v: BV[_] =>
sys.error("Unsupported Breeze vector type: " + v.getClass.getName)
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
index aacaa30084..8abdac7290 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
@@ -55,4 +55,13 @@ class BreezeVectorConversionSuite extends FunSuite {
assert(vec.indices.eq(indices), "should not copy data")
assert(vec.values.eq(values), "should not copy data")
}
+
+ test("sparse breeze with partially-used arrays to vector") {
+ val activeSize = 3
+ val breeze = new BSV[Double](indices, values, activeSize, n)
+ val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
+ assert(vec.size === n)
+ assert(vec.indices === indices.slice(0, activeSize))
+ assert(vec.values === values.slice(0, activeSize))
+ }
}