diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-06-16 23:02:46 +0200 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-06-16 23:02:46 +0200 |
commit | 36110a8306608186696c536028d2776e022d305a (patch) | |
tree | 36fe552ac1aeadb2ec8d7561bf3590407d64e279 /mllib/src/main | |
parent | f9bf15d9bde4df2178f7a8f932c883bb77c46149 (diff) | |
download | spark-36110a8306608186696c536028d2776e022d305a.tar.gz spark-36110a8306608186696c536028d2776e022d305a.tar.bz2 spark-36110a8306608186696c536028d2776e022d305a.zip |
[SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < offset+colsPerBlock`
## What changes were proposed in this pull request?
SPARK-15922 reports the following scenario throwing an exception due to the mismatched vector sizes. This PR handles the exceptional case, `cols < (offset + colsPerBlock)`.
**Before**
```scala
scala> import org.apache.spark.mllib.linalg.distributed._
scala> import org.apache.spark.mllib.linalg._
scala> val rows = IndexedRow(0L, new DenseVector(Array(1,2,3))) :: IndexedRow(1L, new DenseVector(Array(1,2,3))):: IndexedRow(2L, new DenseVector(Array(1,2,3))):: Nil
scala> val rdd = sc.parallelize(rows)
scala> val matrix = new IndexedRowMatrix(rdd, 3, 3)
scala> val bmat = matrix.toBlockMatrix
scala> val imat = bmat.toIndexedRowMatrix
scala> imat.rows.collect
... // java.lang.IllegalArgumentException: requirement failed: Vectors must be the same length!
```
**After**
```scala
...
scala> imat.rows.collect
res0: Array[org.apache.spark.mllib.linalg.distributed.IndexedRow] = Array(IndexedRow(0,[1.0,2.0,3.0]), IndexedRow(1,[1.0,2.0,3.0]), IndexedRow(2,[1.0,2.0,3.0]))
```
## How was this patch tested?
Pass the Jenkins tests (including the above case)
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #13643 from dongjoon-hyun/SPARK-15922.
Diffstat (limited to 'mllib/src/main')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 7a24617781..639295c695 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") ( vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) => val offset = colsPerBlock * blockColIdx - wholeVector(offset until offset + colsPerBlock) := vec + wholeVector(offset until Math.min(cols, offset + colsPerBlock)) := vec } new IndexedRow(rowIdx, Vectors.fromBreeze(wholeVector)) } |