aboutsummaryrefslogtreecommitdiff
path: root/mllib-local
diff options
context:
space:
mode:
Diffstat (limited to 'mllib-local')
-rw-r--r--mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala274
-rw-r--r--mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala420
-rw-r--r--mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala5
3 files changed, 654 insertions, 45 deletions
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index d9ffdeb797..07f3bc2728 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -44,6 +44,12 @@ sealed trait Matrix extends Serializable {
@Since("2.0.0")
val isTransposed: Boolean = false
+ /** Indicates whether the values backing this matrix are arranged in column major order. */
+ private[ml] def isColMajor: Boolean = !isTransposed
+
+ /** Indicates whether the values backing this matrix are arranged in row major order. */
+ private[ml] def isRowMajor: Boolean = isTransposed
+
/** Converts to a dense array in column major. */
@Since("2.0.0")
def toArray: Array[Double] = {
@@ -148,7 +154,8 @@ sealed trait Matrix extends Serializable {
* and column indices respectively with the type `Int`, and the final parameter is the
* corresponding value in the matrix with type `Double`.
*/
- private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
+ @Since("2.2.0")
+ def foreachActive(f: (Int, Int, Double) => Unit): Unit
/**
* Find the number of non-zero active values.
@@ -161,6 +168,116 @@ sealed trait Matrix extends Serializable {
*/
@Since("2.0.0")
def numActives: Int
+
+ /**
+ * Converts this matrix to a sparse matrix.
+ *
+ * @param colMajor Whether the values of the resulting sparse matrix should be in column major
+ * or row major order. If `false`, resulting matrix will be row major.
+ */
+ private[ml] def toSparseMatrix(colMajor: Boolean): SparseMatrix
+
+ /**
+ * Converts this matrix to a sparse matrix in column major order.
+ */
+ @Since("2.2.0")
+ def toSparseColMajor: SparseMatrix = toSparseMatrix(colMajor = true)
+
+ /**
+ * Converts this matrix to a sparse matrix in row major order.
+ */
+ @Since("2.2.0")
+ def toSparseRowMajor: SparseMatrix = toSparseMatrix(colMajor = false)
+
+ /**
+ * Converts this matrix to a sparse matrix while maintaining the layout of the current matrix.
+ */
+ @Since("2.2.0")
+ def toSparse: SparseMatrix = toSparseMatrix(colMajor = isColMajor)
+
+ /**
+ * Converts this matrix to a dense matrix.
+ *
+ * @param colMajor Whether the values of the resulting dense matrix should be in column major
+ * or row major order. If `false`, resulting matrix will be row major.
+ */
+ private[ml] def toDenseMatrix(colMajor: Boolean): DenseMatrix
+
+ /**
+ * Converts this matrix to a dense matrix while maintaining the layout of the current matrix.
+ */
+ @Since("2.2.0")
+ def toDense: DenseMatrix = toDenseMatrix(colMajor = isColMajor)
+
+ /**
+ * Converts this matrix to a dense matrix in row major order.
+ */
+ @Since("2.2.0")
+ def toDenseRowMajor: DenseMatrix = toDenseMatrix(colMajor = false)
+
+ /**
+ * Converts this matrix to a dense matrix in column major order.
+ */
+ @Since("2.2.0")
+ def toDenseColMajor: DenseMatrix = toDenseMatrix(colMajor = true)
+
+ /**
+ * Returns a matrix in dense or sparse column major format, whichever uses less storage.
+ */
+ @Since("2.2.0")
+ def compressedColMajor: Matrix = {
+ if (getDenseSizeInBytes <= getSparseSizeInBytes(colMajor = true)) {
+ this.toDenseColMajor
+ } else {
+ this.toSparseColMajor
+ }
+ }
+
+ /**
+ * Returns a matrix in dense or sparse row major format, whichever uses less storage.
+ */
+ @Since("2.2.0")
+ def compressedRowMajor: Matrix = {
+ if (getDenseSizeInBytes <= getSparseSizeInBytes(colMajor = false)) {
+ this.toDenseRowMajor
+ } else {
+ this.toSparseRowMajor
+ }
+ }
+
+ /**
+ * Returns a matrix in dense column major, dense row major, sparse row major, or sparse column
+ * major format, whichever uses less storage. When dense representation is optimal, it maintains
+ * the current layout order.
+ */
+ @Since("2.2.0")
+ def compressed: Matrix = {
+ val cscSize = getSparseSizeInBytes(colMajor = true)
+ val csrSize = getSparseSizeInBytes(colMajor = false)
+ if (getDenseSizeInBytes <= math.min(cscSize, csrSize)) {
+ // dense matrix size is the same for column major and row major, so maintain current layout
+ this.toDense
+ } else if (cscSize <= csrSize) {
+ this.toSparseColMajor
+ } else {
+ this.toSparseRowMajor
+ }
+ }
+
+ /** Gets the size of the dense representation of this `Matrix`. */
+ private[ml] def getDenseSizeInBytes: Long = {
+ Matrices.getDenseSize(numCols, numRows)
+ }
+
+ /** Gets the size of the minimal sparse representation of this `Matrix`. */
+ private[ml] def getSparseSizeInBytes(colMajor: Boolean): Long = {
+ val nnz = numNonzeros
+ val numPtrs = if (colMajor) numCols + 1L else numRows + 1L
+ Matrices.getSparseSize(nnz, numPtrs)
+ }
+
+ /** Gets the current size in bytes of this `Matrix`. Useful for testing */
+ private[ml] def getSizeInBytes: Long
}
/**
@@ -258,7 +375,7 @@ class DenseMatrix @Since("2.0.0") (
override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)
- private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
+ override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
if (!isTransposed) {
// outer loop over columns
var j = 0
@@ -291,31 +408,49 @@ class DenseMatrix @Since("2.0.0") (
override def numActives: Int = values.length
/**
- * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
- * set to false.
+ * Generate a `SparseMatrix` from the given `DenseMatrix`.
+ *
+ * @param colMajor Whether the resulting `SparseMatrix` values will be in column major order.
*/
- @Since("2.0.0")
- def toSparse: SparseMatrix = {
- val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
- val colPtrs: Array[Int] = new Array[Int](numCols + 1)
- val rowIndices: MArrayBuilder[Int] = new MArrayBuilder.ofInt
- var nnz = 0
- var j = 0
- while (j < numCols) {
- var i = 0
- while (i < numRows) {
- val v = values(index(i, j))
- if (v != 0.0) {
- rowIndices += i
- spVals += v
- nnz += 1
+ private[ml] override def toSparseMatrix(colMajor: Boolean): SparseMatrix = {
+ if (!colMajor) this.transpose.toSparseColMajor.transpose
+ else {
+ val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
+ val colPtrs: Array[Int] = new Array[Int](numCols + 1)
+ val rowIndices: MArrayBuilder[Int] = new MArrayBuilder.ofInt
+ var nnz = 0
+ var j = 0
+ while (j < numCols) {
+ var i = 0
+ while (i < numRows) {
+ val v = values(index(i, j))
+ if (v != 0.0) {
+ rowIndices += i
+ spVals += v
+ nnz += 1
+ }
+ i += 1
}
- i += 1
+ j += 1
+ colPtrs(j) = nnz
}
- j += 1
- colPtrs(j) = nnz
+ new SparseMatrix(numRows, numCols, colPtrs, rowIndices.result(), spVals.result())
+ }
+ }
+
+ /**
+ * Generate a `DenseMatrix` from this `DenseMatrix`.
+ *
+ * @param colMajor Whether the resulting `DenseMatrix` values will be in column major order.
+ */
+ private[ml] override def toDenseMatrix(colMajor: Boolean): DenseMatrix = {
+ if (isRowMajor && colMajor) {
+ new DenseMatrix(numRows, numCols, this.toArray, isTransposed = false)
+ } else if (isColMajor && !colMajor) {
+ new DenseMatrix(numRows, numCols, this.transpose.toArray, isTransposed = true)
+ } else {
+ this
}
- new SparseMatrix(numRows, numCols, colPtrs, rowIndices.result(), spVals.result())
}
override def colIter: Iterator[Vector] = {
@@ -331,6 +466,8 @@ class DenseMatrix @Since("2.0.0") (
}
}
}
+
+ private[ml] def getSizeInBytes: Long = Matrices.getDenseSize(numCols, numRows)
}
/**
@@ -560,7 +697,7 @@ class SparseMatrix @Since("2.0.0") (
override def transpose: SparseMatrix =
new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)
- private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
+ override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
if (!isTransposed) {
var j = 0
while (j < numCols) {
@@ -587,18 +724,67 @@ class SparseMatrix @Since("2.0.0") (
}
}
+ override def numNonzeros: Int = values.count(_ != 0)
+
+ override def numActives: Int = values.length
+
/**
- * Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
- * set to false.
+ * Generate a `SparseMatrix` from this `SparseMatrix`, removing explicit zero values if they
+ * exist.
+ *
+ * @param colMajor Whether or not the resulting `SparseMatrix` values are in column major
+ * order.
*/
- @Since("2.0.0")
- def toDense: DenseMatrix = {
- new DenseMatrix(numRows, numCols, toArray)
+ private[ml] override def toSparseMatrix(colMajor: Boolean): SparseMatrix = {
+ if (isColMajor && !colMajor) {
+ // it is col major and we want row major, use breeze to remove explicit zeros
+ val breezeTransposed = asBreeze.asInstanceOf[BSM[Double]].t
+ Matrices.fromBreeze(breezeTransposed).transpose.asInstanceOf[SparseMatrix]
+ } else if (isRowMajor && colMajor) {
+ // it is row major and we want col major, use breeze to remove explicit zeros
+ val breezeTransposed = asBreeze.asInstanceOf[BSM[Double]]
+ Matrices.fromBreeze(breezeTransposed).asInstanceOf[SparseMatrix]
+ } else {
+ val nnz = numNonzeros
+ if (nnz != numActives) {
+ // remove explicit zeros
+ val rr = new Array[Int](nnz)
+ val vv = new Array[Double](nnz)
+ val numPtrs = if (isRowMajor) numRows else numCols
+ val cc = new Array[Int](numPtrs + 1)
+ var nzIdx = 0
+ var j = 0
+ while (j < numPtrs) {
+ var idx = colPtrs(j)
+ val idxEnd = colPtrs(j + 1)
+ cc(j) = nzIdx
+ while (idx < idxEnd) {
+ if (values(idx) != 0.0) {
+ vv(nzIdx) = values(idx)
+ rr(nzIdx) = rowIndices(idx)
+ nzIdx += 1
+ }
+ idx += 1
+ }
+ j += 1
+ }
+ cc(j) = nnz
+ new SparseMatrix(numRows, numCols, cc, rr, vv, isTransposed = isTransposed)
+ } else {
+ this
+ }
+ }
}
- override def numNonzeros: Int = values.count(_ != 0)
-
- override def numActives: Int = values.length
+ /**
+ * Generate a `DenseMatrix` from the given `SparseMatrix`.
+ *
+ * @param colMajor Whether the resulting `DenseMatrix` values are in column major order.
+ */
+ private[ml] override def toDenseMatrix(colMajor: Boolean): DenseMatrix = {
+ if (colMajor) new DenseMatrix(numRows, numCols, this.toArray)
+ else new DenseMatrix(numRows, numCols, this.transpose.toArray, isTransposed = true)
+ }
override def colIter: Iterator[Vector] = {
if (isTransposed) {
@@ -631,6 +817,8 @@ class SparseMatrix @Since("2.0.0") (
}
}
}
+
+ private[ml] def getSizeInBytes: Long = Matrices.getSparseSize(numActives, colPtrs.length)
}
/**
@@ -1079,4 +1267,26 @@ object Matrices {
SparseMatrix.fromCOO(numRows, numCols, entries)
}
}
+
+ private[ml] def getSparseSize(numActives: Long, numPtrs: Long): Long = {
+ /*
+ Sparse matrices store two int arrays, one double array, two ints, and one boolean:
+ 8 * values.length + 4 * rowIndices.length + 4 * colPtrs.length + arrayHeader * 3 + 2 * 4 + 1
+ */
+ val doubleBytes = java.lang.Double.BYTES
+ val intBytes = java.lang.Integer.BYTES
+ val arrayHeader = 12L
+ doubleBytes * numActives + intBytes * numActives + intBytes * numPtrs + arrayHeader * 3L + 9L
+ }
+
+ private[ml] def getDenseSize(numCols: Long, numRows: Long): Long = {
+ /*
+ Dense matrices store one double array, two ints, and one boolean:
+ 8 * values.length + arrayHeader + 2 * 4 + 1
+ */
+ val doubleBytes = java.lang.Double.BYTES
+ val arrayHeader = 12L
+ doubleBytes * numCols * numRows + arrayHeader + 9L
+ }
+
}
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
index 9c0aa73938..9f82020868 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
@@ -160,22 +160,416 @@ class MatricesSuite extends SparkMLFunSuite {
assert(sparseMat.values(2) === 10.0)
}
- test("toSparse, toDense") {
- val m = 3
- val n = 2
- val values = Array(1.0, 2.0, 4.0, 5.0)
- val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
- val colPtrs = Array(0, 2, 4)
- val rowIndices = Array(0, 1, 1, 2)
+ test("dense to dense") {
+ /*
+ dm1 = 4.0 2.0 -8.0
+ -1.0 7.0 4.0
+
+ dm2 = 5.0 -9.0 4.0
+ 1.0 -3.0 -8.0
+ */
+ val dm1 = new DenseMatrix(2, 3, Array(4.0, -1.0, 2.0, 7.0, -8.0, 4.0))
+ val dm2 = new DenseMatrix(2, 3, Array(5.0, -9.0, 4.0, 1.0, -3.0, -8.0), isTransposed = true)
+
+ val dm8 = dm1.toDenseColMajor
+ assert(dm8 === dm1)
+ assert(dm8.isColMajor)
+ assert(dm8.values.equals(dm1.values))
+
+ val dm5 = dm2.toDenseColMajor
+ assert(dm5 === dm2)
+ assert(dm5.isColMajor)
+ assert(dm5.values === Array(5.0, 1.0, -9.0, -3.0, 4.0, -8.0))
+
+ val dm4 = dm1.toDenseRowMajor
+ assert(dm4 === dm1)
+ assert(dm4.isRowMajor)
+ assert(dm4.values === Array(4.0, 2.0, -8.0, -1.0, 7.0, 4.0))
+
+ val dm6 = dm2.toDenseRowMajor
+ assert(dm6 === dm2)
+ assert(dm6.isRowMajor)
+ assert(dm6.values.equals(dm2.values))
+
+ val dm3 = dm1.toDense
+ assert(dm3 === dm1)
+ assert(dm3.isColMajor)
+ assert(dm3.values.equals(dm1.values))
+
+ val dm9 = dm2.toDense
+ assert(dm9 === dm2)
+ assert(dm9.isRowMajor)
+ assert(dm9.values.equals(dm2.values))
+ }
- val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
- val deMat1 = new DenseMatrix(m, n, allValues)
+ test("dense to sparse") {
+ /*
+ dm1 = 0.0 4.0 5.0
+ 0.0 2.0 0.0
+
+ dm2 = 0.0 4.0 5.0
+ 0.0 2.0 0.0
- val spMat2 = deMat1.toSparse
- val deMat2 = spMat1.toDense
+ dm3 = 0.0 0.0 0.0
+ 0.0 0.0 0.0
+ */
+ val dm1 = new DenseMatrix(2, 3, Array(0.0, 0.0, 4.0, 2.0, 5.0, 0.0))
+ val dm2 = new DenseMatrix(2, 3, Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0), isTransposed = true)
+ val dm3 = new DenseMatrix(2, 3, Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+
+ val sm1 = dm1.toSparseColMajor
+ assert(sm1 === dm1)
+ assert(sm1.isColMajor)
+ assert(sm1.values === Array(4.0, 2.0, 5.0))
+
+ val sm3 = dm2.toSparseColMajor
+ assert(sm3 === dm2)
+ assert(sm3.isColMajor)
+ assert(sm3.values === Array(4.0, 2.0, 5.0))
+
+ val sm5 = dm3.toSparseColMajor
+ assert(sm5 === dm3)
+ assert(sm5.values === Array.empty[Double])
+ assert(sm5.isColMajor)
+
+ val sm2 = dm1.toSparseRowMajor
+ assert(sm2 === dm1)
+ assert(sm2.isRowMajor)
+ assert(sm2.values === Array(4.0, 5.0, 2.0))
+
+ val sm4 = dm2.toSparseRowMajor
+ assert(sm4 === dm2)
+ assert(sm4.isRowMajor)
+ assert(sm4.values === Array(4.0, 5.0, 2.0))
+
+ val sm6 = dm3.toSparseRowMajor
+ assert(sm6 === dm3)
+ assert(sm6.values === Array.empty[Double])
+ assert(sm6.isRowMajor)
+
+ val sm7 = dm1.toSparse
+ assert(sm7 === dm1)
+ assert(sm7.values === Array(4.0, 2.0, 5.0))
+ assert(sm7.isColMajor)
+
+ val sm10 = dm2.toSparse
+ assert(sm10 === dm2)
+ assert(sm10.values === Array(4.0, 5.0, 2.0))
+ assert(sm10.isRowMajor)
+ }
+
+ test("sparse to sparse") {
+ /*
+ sm1 = sm2 = sm3 = sm4 = 0.0 4.0 5.0
+ 0.0 2.0 0.0
+ smZeros = 0.0 0.0 0.0
+ 0.0 0.0 0.0
+ */
+ val sm1 = new SparseMatrix(2, 3, Array(0, 0, 2, 3), Array(0, 1, 0), Array(4.0, 2.0, 5.0))
+ val sm2 = new SparseMatrix(2, 3, Array(0, 2, 3), Array(1, 2, 1), Array(4.0, 5.0, 2.0),
+ isTransposed = true)
+ val sm3 = new SparseMatrix(2, 3, Array(0, 0, 2, 4), Array(0, 1, 0, 1),
+ Array(4.0, 2.0, 5.0, 0.0))
+ val sm4 = new SparseMatrix(2, 3, Array(0, 2, 4), Array(1, 2, 1, 2),
+ Array(4.0, 5.0, 2.0, 0.0), isTransposed = true)
+ val smZeros = new SparseMatrix(2, 3, Array(0, 2, 4, 6), Array(0, 1, 0, 1, 0, 1),
+ Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+
+ val sm6 = sm1.toSparseColMajor
+ assert(sm6 === sm1)
+ assert(sm6.isColMajor)
+ assert(sm6.values.equals(sm1.values))
+
+ val sm7 = sm2.toSparseColMajor
+ assert(sm7 === sm2)
+ assert(sm7.isColMajor)
+ assert(sm7.values === Array(4.0, 2.0, 5.0))
+
+ val sm16 = sm3.toSparseColMajor
+ assert(sm16 === sm3)
+ assert(sm16.isColMajor)
+ assert(sm16.values === Array(4.0, 2.0, 5.0))
+
+ val sm14 = sm4.toSparseColMajor
+ assert(sm14 === sm4)
+ assert(sm14.values === Array(4.0, 2.0, 5.0))
+ assert(sm14.isColMajor)
+
+ val sm15 = smZeros.toSparseColMajor
+ assert(sm15 === smZeros)
+ assert(sm15.values === Array.empty[Double])
+ assert(sm15.isColMajor)
+
+ val sm5 = sm1.toSparseRowMajor
+ assert(sm5 === sm1)
+ assert(sm5.isRowMajor)
+ assert(sm5.values === Array(4.0, 5.0, 2.0))
+
+ val sm8 = sm2.toSparseRowMajor
+ assert(sm8 === sm2)
+ assert(sm8.isRowMajor)
+ assert(sm8.values.equals(sm2.values))
+
+ val sm10 = sm3.toSparseRowMajor
+ assert(sm10 === sm3)
+ assert(sm10.values === Array(4.0, 5.0, 2.0))
+ assert(sm10.isRowMajor)
+
+ val sm11 = sm4.toSparseRowMajor
+ assert(sm11 === sm4)
+ assert(sm11.values === Array(4.0, 5.0, 2.0))
+ assert(sm11.isRowMajor)
+
+ val sm17 = smZeros.toSparseRowMajor
+ assert(sm17 === smZeros)
+ assert(sm17.values === Array.empty[Double])
+ assert(sm17.isRowMajor)
+
+ val sm9 = sm3.toSparse
+ assert(sm9 === sm3)
+ assert(sm9.values === Array(4.0, 2.0, 5.0))
+ assert(sm9.isColMajor)
+
+ val sm12 = sm4.toSparse
+ assert(sm12 === sm4)
+ assert(sm12.values === Array(4.0, 5.0, 2.0))
+ assert(sm12.isRowMajor)
+
+ val sm13 = smZeros.toSparse
+ assert(sm13 === smZeros)
+ assert(sm13.values === Array.empty[Double])
+ assert(sm13.isColMajor)
+ }
+
+ test("sparse to dense") {
+ /*
+ sm1 = sm2 = 0.0 4.0 5.0
+ 0.0 2.0 0.0
+
+ sm3 = 0.0 0.0 0.0
+ 0.0 0.0 0.0
+ */
+ val sm1 = new SparseMatrix(2, 3, Array(0, 0, 2, 3), Array(0, 1, 0), Array(4.0, 2.0, 5.0))
+ val sm2 = new SparseMatrix(2, 3, Array(0, 2, 3), Array(1, 2, 1), Array(4.0, 5.0, 2.0),
+ isTransposed = true)
+ val sm3 = new SparseMatrix(2, 3, Array(0, 0, 0, 0), Array.empty[Int], Array.empty[Double])
+
+ val dm6 = sm1.toDenseColMajor
+ assert(dm6 === sm1)
+ assert(dm6.isColMajor)
+ assert(dm6.values === Array(0.0, 0.0, 4.0, 2.0, 5.0, 0.0))
+
+ val dm7 = sm2.toDenseColMajor
+ assert(dm7 === sm2)
+ assert(dm7.isColMajor)
+ assert(dm7.values === Array(0.0, 0.0, 4.0, 2.0, 5.0, 0.0))
+
+ val dm2 = sm1.toDenseRowMajor
+ assert(dm2 === sm1)
+ assert(dm2.isRowMajor)
+ assert(dm2.values === Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0))
+
+ val dm4 = sm2.toDenseRowMajor
+ assert(dm4 === sm2)
+ assert(dm4.isRowMajor)
+ assert(dm4.values === Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0))
+
+ val dm1 = sm1.toDense
+ assert(dm1 === sm1)
+ assert(dm1.isColMajor)
+ assert(dm1.values === Array(0.0, 0.0, 4.0, 2.0, 5.0, 0.0))
+
+ val dm3 = sm2.toDense
+ assert(dm3 === sm2)
+ assert(dm3.isRowMajor)
+ assert(dm3.values === Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0))
+
+ val dm5 = sm3.toDense
+ assert(dm5 === sm3)
+ assert(dm5.isColMajor)
+ assert(dm5.values === Array.fill(6)(0.0))
+ }
+
+ test("compressed dense") {
+ /*
+ dm1 = 1.0 0.0 0.0 0.0
+ 1.0 0.0 0.0 0.0
+ 0.0 0.0 0.0 0.0
+
+ dm2 = 1.0 1.0 0.0 0.0
+ 0.0 0.0 0.0 0.0
+ 0.0 0.0 0.0 0.0
+ */
+ // this should compress to a sparse matrix
+ val dm1 = new DenseMatrix(3, 4, Array.fill(2)(1.0) ++ Array.fill(10)(0.0))
+
+ // optimal compression layout is row major since numRows < numCols
+ val cm1 = dm1.compressed.asInstanceOf[SparseMatrix]
+ assert(cm1 === dm1)
+ assert(cm1.isRowMajor)
+ assert(cm1.getSizeInBytes < dm1.getSizeInBytes)
+
+ // force compressed column major
+ val cm2 = dm1.compressedColMajor.asInstanceOf[SparseMatrix]
+ assert(cm2 === dm1)
+ assert(cm2.isColMajor)
+ assert(cm2.getSizeInBytes < dm1.getSizeInBytes)
+
+ // optimal compression layout for transpose is column major
+ val dm2 = dm1.transpose
+ val cm3 = dm2.compressed.asInstanceOf[SparseMatrix]
+ assert(cm3 === dm2)
+ assert(cm3.isColMajor)
+ assert(cm3.getSizeInBytes < dm2.getSizeInBytes)
+
+ /*
+ dm3 = 1.0 1.0 1.0 0.0
+ 1.0 1.0 0.0 0.0
+ 1.0 1.0 0.0 0.0
+
+ dm4 = 1.0 1.0 1.0 1.0
+ 1.0 1.0 1.0 0.0
+ 0.0 0.0 0.0 0.0
+ */
+ // this should compress to a dense matrix
+ val dm3 = new DenseMatrix(3, 4, Array.fill(7)(1.0) ++ Array.fill(5)(0.0))
+ val dm4 = new DenseMatrix(3, 4, Array.fill(7)(1.0) ++ Array.fill(5)(0.0), isTransposed = true)
+
+ val cm4 = dm3.compressed.asInstanceOf[DenseMatrix]
+ assert(cm4 === dm3)
+ assert(cm4.isColMajor)
+ assert(cm4.values.equals(dm3.values))
+ assert(cm4.getSizeInBytes === dm3.getSizeInBytes)
+
+ // force compressed row major
+ val cm5 = dm3.compressedRowMajor.asInstanceOf[DenseMatrix]
+ assert(cm5 === dm3)
+ assert(cm5.isRowMajor)
+ assert(cm5.getSizeInBytes === dm3.getSizeInBytes)
+
+ val cm6 = dm4.compressed.asInstanceOf[DenseMatrix]
+ assert(cm6 === dm4)
+ assert(cm6.isRowMajor)
+ assert(cm6.values.equals(dm4.values))
+ assert(cm6.getSizeInBytes === dm4.getSizeInBytes)
+
+ val cm7 = dm4.compressedColMajor.asInstanceOf[DenseMatrix]
+ assert(cm7 === dm4)
+ assert(cm7.isColMajor)
+ assert(cm7.getSizeInBytes === dm4.getSizeInBytes)
+
+ // this has the same size sparse or dense
+ val dm5 = new DenseMatrix(4, 4, Array.fill(7)(1.0) ++ Array.fill(9)(0.0))
+ // should choose dense to break ties
+ val cm8 = dm5.compressed.asInstanceOf[DenseMatrix]
+ assert(cm8.getSizeInBytes === dm5.toSparseColMajor.getSizeInBytes)
+ }
- assert(spMat1.asBreeze === spMat2.asBreeze)
- assert(deMat1.asBreeze === deMat2.asBreeze)
+ test("compressed sparse") {
+ /*
+ sm1 = 0.0 -1.0
+ 0.0 0.0
+ 0.0 0.0
+ 0.0 0.0
+
+ sm2 = 0.0 0.0 0.0 0.0
+ -1.0 0.0 0.0 0.0
+ */
+ // these should compress to sparse matrices
+ val sm1 = new SparseMatrix(4, 2, Array(0, 0, 1), Array(0), Array(-1.0))
+ val sm2 = sm1.transpose
+
+ val cm1 = sm1.compressed.asInstanceOf[SparseMatrix]
+ // optimal is column major
+ assert(cm1 === sm1)
+ assert(cm1.isColMajor)
+ assert(cm1.values.equals(sm1.values))
+ assert(cm1.getSizeInBytes === sm1.getSizeInBytes)
+
+ val cm2 = sm1.compressedRowMajor.asInstanceOf[SparseMatrix]
+ assert(cm2 === sm1)
+ assert(cm2.isRowMajor)
+ // forced to be row major, so we have increased the size
+ assert(cm2.getSizeInBytes > sm1.getSizeInBytes)
+ assert(cm2.getSizeInBytes < sm1.toDense.getSizeInBytes)
+
+ val cm9 = sm1.compressedColMajor.asInstanceOf[SparseMatrix]
+ assert(cm9 === sm1)
+ assert(cm9.values.equals(sm1.values))
+ assert(cm9.getSizeInBytes === sm1.getSizeInBytes)
+
+ val cm3 = sm2.compressed.asInstanceOf[SparseMatrix]
+ assert(cm3 === sm2)
+ assert(cm3.isRowMajor)
+ assert(cm3.values.equals(sm2.values))
+ assert(cm3.getSizeInBytes === sm2.getSizeInBytes)
+
+ val cm8 = sm2.compressedColMajor.asInstanceOf[SparseMatrix]
+ assert(cm8 === sm2)
+ assert(cm8.isColMajor)
+ // forced to be col major, so we have increased the size
+ assert(cm8.getSizeInBytes > sm2.getSizeInBytes)
+ assert(cm8.getSizeInBytes < sm2.toDense.getSizeInBytes)
+
+ val cm10 = sm2.compressedRowMajor.asInstanceOf[SparseMatrix]
+ assert(cm10 === sm2)
+ assert(cm10.isRowMajor)
+ assert(cm10.values.equals(sm2.values))
+ assert(cm10.getSizeInBytes === sm2.getSizeInBytes)
+
+
+ /*
+ sm3 = 0.0 -1.0
+ 2.0 3.0
+ -4.0 9.0
+ */
+ // this should compress to a dense matrix
+ val sm3 = new SparseMatrix(3, 2, Array(0, 2, 5), Array(1, 2, 0, 1, 2),
+ Array(2.0, -4.0, -1.0, 3.0, 9.0))
+
+ // dense is optimal, and maintains column major
+ val cm4 = sm3.compressed.asInstanceOf[DenseMatrix]
+ assert(cm4 === sm3)
+ assert(cm4.isColMajor)
+ assert(cm4.getSizeInBytes < sm3.getSizeInBytes)
+
+ val cm5 = sm3.compressedRowMajor.asInstanceOf[DenseMatrix]
+ assert(cm5 === sm3)
+ assert(cm5.isRowMajor)
+ assert(cm5.getSizeInBytes < sm3.getSizeInBytes)
+
+ val cm11 = sm3.compressedColMajor.asInstanceOf[DenseMatrix]
+ assert(cm11 === sm3)
+ assert(cm11.isColMajor)
+ assert(cm11.getSizeInBytes < sm3.getSizeInBytes)
+
+ /*
+ sm4 = 1.0 0.0 0.0 ...
+
+ sm5 = 1.0
+ 0.0
+ 0.0
+ ...
+ */
+ val sm4 = new SparseMatrix(Int.MaxValue, 1, Array(0, 1), Array(0), Array(1.0))
+ val cm6 = sm4.compressed.asInstanceOf[SparseMatrix]
+ assert(cm6 === sm4)
+ assert(cm6.isColMajor)
+ assert(cm6.getSizeInBytes <= sm4.getSizeInBytes)
+
+ val sm5 = new SparseMatrix(1, Int.MaxValue, Array(0, 1), Array(0), Array(1.0),
+ isTransposed = true)
+ val cm7 = sm5.compressed.asInstanceOf[SparseMatrix]
+ assert(cm7 === sm5)
+ assert(cm7.isRowMajor)
+ assert(cm7.getSizeInBytes <= sm5.getSizeInBytes)
+
+ // this has the same size sparse or dense
+ val sm6 = new SparseMatrix(4, 4, Array(0, 4, 7, 7, 7), Array(0, 1, 2, 3, 0, 1, 2),
+ Array.fill(7)(1.0))
+ // should choose dense to break ties
+ val cm12 = sm6.compressed.asInstanceOf[DenseMatrix]
+ assert(cm12.getSizeInBytes === sm6.getSizeInBytes)
}
test("map, update") {
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
index ea22c2787f..dfbdaf19d3 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
@@ -336,6 +336,11 @@ class VectorsSuite extends SparkMLFunSuite {
val sv1 = Vectors.sparse(4, Array(0, 1, 2), Array(1.0, 2.0, 3.0))
val sv1c = sv1.compressed.asInstanceOf[DenseVector]
assert(sv1 === sv1c)
+
+ val sv2 = Vectors.sparse(Int.MaxValue, Array(0), Array(3.4))
+ val sv2c = sv2.compressed.asInstanceOf[SparseVector]
+ assert(sv2c === sv2)
+ assert(sv2c.numActives === 1)
}
test("SparseVector.slice") {