aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-01-29 21:26:29 -0800
committerXiangrui Meng <meng@databricks.com>2015-01-29 21:26:29 -0800
commitdd4d84cf809e6e425958fe768c518679d1828779 (patch)
treecfaf31c5f35e19c78f72b8c882abacb317680559 /mllib
parent80def9deb3bfc30d5b622b32aecb0322341a7f62 (diff)
downloadspark-dd4d84cf809e6e425958fe768c518679d1828779.tar.gz
spark-dd4d84cf809e6e425958fe768c518679d1828779.tar.bz2
spark-dd4d84cf809e6e425958fe768c518679d1828779.zip
[SPARK-5322] Added transpose functionality to BlockMatrix
BlockMatrices can now be transposed! Author: Burak Yavuz <brkyvz@gmail.com> Closes #4275 from brkyvz/SPARK-5322 and squashes the following commits: 33806ed [Burak Yavuz] added lazy comment 33e9219 [Burak Yavuz] made transpose lazy 5a274cd [Burak Yavuz] added cached tests 5dcf85c [Burak Yavuz] [SPARK-5322] Added transpose functionality to BlockMatrix
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala9
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala29
2 files changed, 38 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 426dbf4805..693419f827 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -232,6 +232,15 @@ class BlockMatrix(
new DenseMatrix(m, n, values)
}
+ /** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the
+ * same underlying data. Is a lazy operation. */
+ def transpose: BlockMatrix = {
+ val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) =>
+ ((blockColIndex, blockRowIndex), mat.transpose)
+ }
+ new BlockMatrix(transposedBlocks, colsPerBlock, rowsPerBlock, nCols, nRows)
+ }
+
/** Collects data and assembles a local dense breeze matrix (for test only). */
private[mllib] def toBreeze(): BDM[Double] = {
val localMat = toLocalMatrix()
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index 7284d03d24..03f34308dd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -146,4 +146,33 @@ class BlockMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(gridBasedMat.toLocalMatrix() === dense)
assert(gridBasedMat.toBreeze() === expected)
}
+
+ test("transpose") {
+ val expected = BDM(
+ (1.0, 0.0, 3.0, 0.0, 0.0),
+ (0.0, 2.0, 1.0, 1.0, 0.0),
+ (0.0, 1.0, 1.0, 2.0, 1.0),
+ (0.0, 0.0, 0.0, 1.0, 5.0))
+
+ val AT = gridBasedMat.transpose
+ assert(AT.numRows() === gridBasedMat.numCols())
+ assert(AT.numCols() === gridBasedMat.numRows())
+ assert(AT.toBreeze() === expected)
+
+ // partitioner must update as well
+ val originalPartitioner = gridBasedMat.partitioner
+ val ATpartitioner = AT.partitioner
+ assert(originalPartitioner.colsPerPart === ATpartitioner.rowsPerPart)
+ assert(originalPartitioner.rowsPerPart === ATpartitioner.colsPerPart)
+ assert(originalPartitioner.cols === ATpartitioner.rows)
+ assert(originalPartitioner.rows === ATpartitioner.cols)
+
+ // make sure it works when matrices are cached as well
+ gridBasedMat.cache()
+ val AT2 = gridBasedMat.transpose
+ AT2.cache()
+ assert(AT2.toBreeze() === AT.toBreeze())
+ val A = AT2.transpose
+ assert(A.toBreeze() === gridBasedMat.toBreeze())
+ }
}