aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorReza Zadeh <reza@databricks.com>2015-01-21 09:48:38 -0800
committerXiangrui Meng <meng@databricks.com>2015-01-21 09:48:38 -0800
commitaa1e22b17b4ce885febe6970a2451c7d17d0acfb (patch)
treee7d117fbb40746084c6d6da71e1dd6d9a2781fb3 /mllib
parent2eeada373e59d63b774ba92eb5d75fcd3a1cf8f4 (diff)
downloadspark-aa1e22b17b4ce885febe6970a2451c7d17d0acfb.tar.gz
spark-aa1e22b17b4ce885febe6970a2451c7d17d0acfb.tar.bz2
spark-aa1e22b17b4ce885febe6970a2451c7d17d0acfb.zip
[MLlib] [SPARK-5301] Missing conversions and operations on IndexedRowMatrix and CoordinateMatrix
* Transpose is missing from CoordinateMatrix (this is cheap to compute, so it should be there) * IndexedRowMatrix should be convertable to CoordinateMatrix (conversion added) Tests for both added. Author: Reza Zadeh <reza@databricks.com> Closes #4089 from rezazadeh/matutils and squashes the following commits: ec5238b [Reza Zadeh] Array -> Iterator to avoid temp array 3ce0b5d [Reza Zadeh] Array -> Iterator bbc907a [Reza Zadeh] Use 'i' for index, and zipWithIndex cb10ae5 [Reza Zadeh] remove unnecessary import a7ae048 [Reza Zadeh] Missing linear algebra utilities
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala17
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala5
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala8
4 files changed, 35 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 06d8915f3b..b60559c853 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -69,6 +69,11 @@ class CoordinateMatrix(
nRows
}
+ /** Transposes this CoordinateMatrix. */
+ def transpose(): CoordinateMatrix = {
+ new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
+ }
+
/** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
def toIndexedRowMatrix(): IndexedRowMatrix = {
val nl = numCols()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 181f507516..c518271f04 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -76,6 +76,23 @@ class IndexedRowMatrix(
}
/**
+ * Converts this matrix to a
+ * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
+ */
+ def toCoordinateMatrix(): CoordinateMatrix = {
+ val entries = rows.flatMap { row =>
+ val rowIndex = row.index
+ row.vector match {
+ case SparseVector(size, indices, values) =>
+ Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
+ case DenseVector(values) =>
+ Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
+ }
+ }
+ new CoordinateMatrix(entries, numRows(), numCols())
+ }
+
+ /**
* Computes the singular value decomposition of this IndexedRowMatrix.
* Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
*
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
index f8709751ef..80bef814ce 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
@@ -73,6 +73,11 @@ class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(mat.toBreeze() === expected)
}
+ test("transpose") {
+ val transposed = mat.transpose()
+ assert(mat.toBreeze().t === transposed.toBreeze())
+ }
+
test("toIndexedRowMatrix") {
val indexedRowMatrix = mat.toIndexedRowMatrix()
val expected = BDM(
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index 741cd4997b..b86c2ca5ff 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -80,6 +80,14 @@ class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(rowMat.rows.collect().toSeq === data.map(_.vector).toSeq)
}
+ test("toCoordinateMatrix") {
+ val idxRowMat = new IndexedRowMatrix(indexedRows)
+ val coordMat = idxRowMat.toCoordinateMatrix()
+ assert(coordMat.numRows() === m)
+ assert(coordMat.numCols() === n)
+ assert(coordMat.toBreeze() === idxRowMat.toBreeze())
+ }
+
test("multiply a local matrix") {
val A = new IndexedRowMatrix(indexedRows)
val B = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))