diff options
author | Burak Yavuz <brkyvz@gmail.com> | 2015-01-29 21:26:29 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-01-29 21:26:29 -0800 |
commit | dd4d84cf809e6e425958fe768c518679d1828779 (patch) | |
tree | cfaf31c5f35e19c78f72b8c882abacb317680559 | |
parent | 80def9deb3bfc30d5b622b32aecb0322341a7f62 (diff) | |
download | spark-dd4d84cf809e6e425958fe768c518679d1828779.tar.gz spark-dd4d84cf809e6e425958fe768c518679d1828779.tar.bz2 spark-dd4d84cf809e6e425958fe768c518679d1828779.zip |
[SPARK-5322] Added transpose functionality to BlockMatrix
BlockMatrices can now be transposed!
Author: Burak Yavuz <brkyvz@gmail.com>
Closes #4275 from brkyvz/SPARK-5322 and squashes the following commits:
33806ed [Burak Yavuz] added lazy comment
33e9219 [Burak Yavuz] made transpose lazy
5a274cd [Burak Yavuz] added cached tests
5dcf85c [Burak Yavuz] [SPARK-5322] Added transpose functionality to BlockMatrix
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 9 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala | 29 |
2 files changed, 38 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 426dbf4805..693419f827 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -232,6 +232,15 @@ class BlockMatrix( new DenseMatrix(m, n, values) } + /** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the + * same underlying data. Is a lazy operation. */ + def transpose: BlockMatrix = { + val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) => + ((blockColIndex, blockRowIndex), mat.transpose) + } + new BlockMatrix(transposedBlocks, colsPerBlock, rowsPerBlock, nCols, nRows) + } + /** Collects data and assembles a local dense breeze matrix (for test only). */ private[mllib] def toBreeze(): BDM[Double] = { val localMat = toLocalMatrix() diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala index 7284d03d24..03f34308dd 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala @@ -146,4 +146,33 @@ class BlockMatrixSuite extends FunSuite with MLlibTestSparkContext { assert(gridBasedMat.toLocalMatrix() === dense) assert(gridBasedMat.toBreeze() === expected) } + + test("transpose") { + val expected = BDM( + (1.0, 0.0, 3.0, 0.0, 0.0), + (0.0, 2.0, 1.0, 1.0, 0.0), + (0.0, 1.0, 1.0, 2.0, 1.0), + (0.0, 0.0, 0.0, 1.0, 5.0)) + + val AT = gridBasedMat.transpose + assert(AT.numRows() === gridBasedMat.numCols()) + assert(AT.numCols() === gridBasedMat.numRows()) + assert(AT.toBreeze() === expected) + + // partitioner must update as well + val originalPartitioner = gridBasedMat.partitioner + val ATpartitioner = AT.partitioner + assert(originalPartitioner.colsPerPart === ATpartitioner.rowsPerPart) + assert(originalPartitioner.rowsPerPart === ATpartitioner.colsPerPart) + assert(originalPartitioner.cols === ATpartitioner.rows) + assert(originalPartitioner.rows === ATpartitioner.cols) + + // make sure it works when matrices are cached as well + gridBasedMat.cache() + val AT2 = gridBasedMat.transpose + AT2.cache() + assert(AT2.toBreeze() === AT.toBreeze()) + val A = AT2.transpose + assert(A.toBreeze() === gridBasedMat.toBreeze()) + } } |