aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main
diff options
context:
space:
mode:
authorBurak <brkyvz@gmail.com>2014-09-18 22:18:51 -0700
committerXiangrui Meng <meng@databricks.com>2014-09-18 22:18:51 -0700
commite76ef5cb8eed6b78fb722b3d6fbeb9466a0e3499 (patch)
tree49ccb21c80f167c05202447fb75c85c369bb6693 /mllib/src/main
parente77fa81a61798c89d5a9b6c9dc067d11785254b7 (diff)
downloadspark-e76ef5cb8eed6b78fb722b3d6fbeb9466a0e3499.tar.gz
spark-e76ef5cb8eed6b78fb722b3d6fbeb9466a0e3499.tar.bz2
spark-e76ef5cb8eed6b78fb722b3d6fbeb9466a0e3499.zip
[SPARK-3418] Sparse Matrix support (CCS) and additional native BLAS operations added
Local `SparseMatrix` support added in Compressed Column Storage (CCS) format in addition to Level-2 and Level-3 BLAS operations such as dgemv and dgemm respectively. BLAS doesn't support sparse matrix operations, therefore support for `SparseMatrix`-`DenseMatrix` multiplication and `SparseMatrix`-`DenseVector` implementations have been added. I will post performance comparisons in the comments momentarily. Author: Burak <brkyvz@gmail.com> Closes #2294 from brkyvz/SPARK-3418 and squashes the following commits: 88814ed [Burak] Hopefully fixed MiMa this time 47e49d5 [Burak] really fixed MiMa issue f0bae57 [Burak] [SPARK-3418] Fixed MiMa compatibility issues (excluded from check) 4b7dbec [Burak] 9/17 comments addressed 7af2f83 [Burak] sealed traits Vector and Matrix d3a8a16 [Burak] [SPARK-3418] Squashed missing alpha bug. 421045f [Burak] [SPARK-3418] New code review comments addressed f35a161 [Burak] [SPARK-3418] Code review comments addressed and multiplication further optimized 2508577 [Burak] [SPARK-3418] Fixed one more style issue d16e8a0 [Burak] [SPARK-3418] Fixed style issues and added documentation for methods 204a3f7 [Burak] [SPARK-3418] Fixed failing Matrix unit test 6025297 [Burak] [SPARK-3418] Fixed Scala-style errors dc7be71 [Burak] [SPARK-3418][MLlib] Matrix unit tests expanded with indexing and updating d2d5851 [Burak] [SPARK-3418][MLlib] Sparse Matrix support and additional native BLAS operations added
Diffstat (limited to 'mllib/src/main')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala330
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala232
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala2
3 files changed, 557 insertions, 7 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index 70e23033c8..54ee930d61 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -18,13 +18,17 @@
package org.apache.spark.mllib.linalg
import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
+import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
+
+import org.apache.spark.Logging
/**
* BLAS routines for MLlib's vectors and matrices.
*/
-private[mllib] object BLAS extends Serializable {
+private[mllib] object BLAS extends Serializable with Logging {
@transient private var _f2jBLAS: NetlibBLAS = _
+ @transient private var _nativeBLAS: NetlibBLAS = _
// For level-1 routines, we use Java implementation.
private def f2jBLAS: NetlibBLAS = {
@@ -197,4 +201,328 @@ private[mllib] object BLAS extends Serializable {
throw new IllegalArgumentException(s"scal doesn't support vector type ${x.getClass}.")
}
}
+
+ // For level-3 routines, we use the native BLAS.
+ private def nativeBLAS: NetlibBLAS = {
+ if (_nativeBLAS == null) {
+ _nativeBLAS = NativeBLAS
+ }
+ _nativeBLAS
+ }
+
+ /**
+ * C := alpha * A * B + beta * C
+ * @param transA whether to use the transpose of matrix A (true), or A itself (false).
+ * @param transB whether to use the transpose of matrix B (true), or B itself (false).
+ * @param alpha a scalar to scale the multiplication A * B.
+ * @param A the matrix A that will be left multiplied to B. Size of m x k.
+ * @param B the matrix B that will be left multiplied by A. Size of k x n.
+ * @param beta a scalar that can be used to scale matrix C.
+ * @param C the resulting matrix C. Size of m x n.
+ */
+ def gemm(
+ transA: Boolean,
+ transB: Boolean,
+ alpha: Double,
+ A: Matrix,
+ B: DenseMatrix,
+ beta: Double,
+ C: DenseMatrix): Unit = {
+ if (alpha == 0.0) {
+ logDebug("gemm: alpha is equal to 0. Returning C.")
+ } else {
+ A match {
+ case sparse: SparseMatrix =>
+ gemm(transA, transB, alpha, sparse, B, beta, C)
+ case dense: DenseMatrix =>
+ gemm(transA, transB, alpha, dense, B, beta, C)
+ case _ =>
+ throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.")
+ }
+ }
+ }
+
+ /**
+ * C := alpha * A * B + beta * C
+ *
+ * @param alpha a scalar to scale the multiplication A * B.
+ * @param A the matrix A that will be left multiplied to B. Size of m x k.
+ * @param B the matrix B that will be left multiplied by A. Size of k x n.
+ * @param beta a scalar that can be used to scale matrix C.
+ * @param C the resulting matrix C. Size of m x n.
+ */
+ def gemm(
+ alpha: Double,
+ A: Matrix,
+ B: DenseMatrix,
+ beta: Double,
+ C: DenseMatrix): Unit = {
+ gemm(false, false, alpha, A, B, beta, C)
+ }
+
+ /**
+ * C := alpha * A * B + beta * C
+ * For `DenseMatrix` A.
+ */
+ private def gemm(
+ transA: Boolean,
+ transB: Boolean,
+ alpha: Double,
+ A: DenseMatrix,
+ B: DenseMatrix,
+ beta: Double,
+ C: DenseMatrix): Unit = {
+ val mA: Int = if (!transA) A.numRows else A.numCols
+ val nB: Int = if (!transB) B.numCols else B.numRows
+ val kA: Int = if (!transA) A.numCols else A.numRows
+ val kB: Int = if (!transB) B.numRows else B.numCols
+ val tAstr = if (!transA) "N" else "T"
+ val tBstr = if (!transB) "N" else "T"
+
+ require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
+ require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
+ require(nB == C.numCols,
+ s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
+
+ nativeBLAS.dgemm(tAstr, tBstr, mA, nB, kA, alpha, A.values, A.numRows, B.values, B.numRows,
+ beta, C.values, C.numRows)
+ }
+
+ /**
+ * C := alpha * A * B + beta * C
+ * For `SparseMatrix` A.
+ */
+ private def gemm(
+ transA: Boolean,
+ transB: Boolean,
+ alpha: Double,
+ A: SparseMatrix,
+ B: DenseMatrix,
+ beta: Double,
+ C: DenseMatrix): Unit = {
+ val mA: Int = if (!transA) A.numRows else A.numCols
+ val nB: Int = if (!transB) B.numCols else B.numRows
+ val kA: Int = if (!transA) A.numCols else A.numRows
+ val kB: Int = if (!transB) B.numRows else B.numCols
+
+ require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
+ require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
+ require(nB == C.numCols,
+ s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
+
+ val Avals = A.values
+ val Arows = if (!transA) A.rowIndices else A.colPtrs
+ val Acols = if (!transA) A.colPtrs else A.rowIndices
+
+ // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
+ if (transA){
+ var colCounterForB = 0
+ if (!transB) { // Expensive to put the check inside the loop
+ while (colCounterForB < nB) {
+ var rowCounterForA = 0
+ val Cstart = colCounterForB * mA
+ val Bstart = colCounterForB * kA
+ while (rowCounterForA < mA) {
+ var i = Arows(rowCounterForA)
+ val indEnd = Arows(rowCounterForA + 1)
+ var sum = 0.0
+ while (i < indEnd) {
+ sum += Avals(i) * B.values(Bstart + Acols(i))
+ i += 1
+ }
+ val Cindex = Cstart + rowCounterForA
+ C.values(Cindex) = beta * C.values(Cindex) + sum * alpha
+ rowCounterForA += 1
+ }
+ colCounterForB += 1
+ }
+ } else {
+ while (colCounterForB < nB) {
+ var rowCounter = 0
+ val Cstart = colCounterForB * mA
+ while (rowCounter < mA) {
+ var i = Arows(rowCounter)
+ val indEnd = Arows(rowCounter + 1)
+ var sum = 0.0
+ while (i < indEnd) {
+ sum += Avals(i) * B(colCounterForB, Acols(i))
+ i += 1
+ }
+ val Cindex = Cstart + rowCounter
+ C.values(Cindex) = beta * C.values(Cindex) + sum * alpha
+ rowCounter += 1
+ }
+ colCounterForB += 1
+ }
+ }
+ } else {
+ // Scale matrix first if `beta` is not equal to 0.0
+ if (beta != 0.0){
+ f2jBLAS.dscal(C.values.length, beta, C.values, 1)
+ }
+ // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of
+ // B, and added to C.
+ var colCounterForB = 0 // the column to be updated in C
+ if (!transB) { // Expensive to put the check inside the loop
+ while (colCounterForB < nB) {
+ var colCounterForA = 0 // The column of A to multiply with the row of B
+ val Bstart = colCounterForB * kB
+ val Cstart = colCounterForB * mA
+ while (colCounterForA < kA) {
+ var i = Acols(colCounterForA)
+ val indEnd = Acols(colCounterForA + 1)
+ val Bval = B.values(Bstart + colCounterForA) * alpha
+ while (i < indEnd){
+ C.values(Cstart + Arows(i)) += Avals(i) * Bval
+ i += 1
+ }
+ colCounterForA += 1
+ }
+ colCounterForB += 1
+ }
+ } else {
+ while (colCounterForB < nB) {
+ var colCounterForA = 0 // The column of A to multiply with the row of B
+ val Cstart = colCounterForB * mA
+ while (colCounterForA < kA){
+ var i = Acols(colCounterForA)
+ val indEnd = Acols(colCounterForA + 1)
+ val Bval = B(colCounterForB, colCounterForA) * alpha
+ while (i < indEnd){
+ C.values(Cstart + Arows(i)) += Avals(i) * Bval
+ i += 1
+ }
+ colCounterForA += 1
+ }
+ colCounterForB += 1
+ }
+ }
+ }
+ }
+
+ /**
+ * y := alpha * A * x + beta * y
+ * @param trans whether to use the transpose of matrix A (true), or A itself (false).
+ * @param alpha a scalar to scale the multiplication A * x.
+ * @param A the matrix A that will be left multiplied to x. Size of m x n.
+ * @param x the vector x that will be left multiplied by A. Size of n x 1.
+ * @param beta a scalar that can be used to scale vector y.
+ * @param y the resulting vector y. Size of m x 1.
+ */
+ def gemv(
+ trans: Boolean,
+ alpha: Double,
+ A: Matrix,
+ x: DenseVector,
+ beta: Double,
+ y: DenseVector): Unit = {
+
+ val mA: Int = if (!trans) A.numRows else A.numCols
+ val nx: Int = x.size
+ val nA: Int = if (!trans) A.numCols else A.numRows
+
+ require(nA == nx, s"The columns of A don't match the number of elements of x. A: $nA, x: $nx")
+ require(mA == y.size,
+ s"The rows of A don't match the number of elements of y. A: $mA, y:${y.size}}")
+ if (alpha == 0.0) {
+ logDebug("gemv: alpha is equal to 0. Returning y.")
+ } else {
+ A match {
+ case sparse: SparseMatrix =>
+ gemv(trans, alpha, sparse, x, beta, y)
+ case dense: DenseMatrix =>
+ gemv(trans, alpha, dense, x, beta, y)
+ case _ =>
+ throw new IllegalArgumentException(s"gemv doesn't support matrix type ${A.getClass}.")
+ }
+ }
+ }
+
+ /**
+ * y := alpha * A * x + beta * y
+ *
+ * @param alpha a scalar to scale the multiplication A * x.
+ * @param A the matrix A that will be left multiplied to x. Size of m x n.
+ * @param x the vector x that will be left multiplied by A. Size of n x 1.
+ * @param beta a scalar that can be used to scale vector y.
+ * @param y the resulting vector y. Size of m x 1.
+ */
+ def gemv(
+ alpha: Double,
+ A: Matrix,
+ x: DenseVector,
+ beta: Double,
+ y: DenseVector): Unit = {
+ gemv(false, alpha, A, x, beta, y)
+ }
+
+ /**
+ * y := alpha * A * x + beta * y
+ * For `DenseMatrix` A.
+ */
+ private def gemv(
+ trans: Boolean,
+ alpha: Double,
+ A: DenseMatrix,
+ x: DenseVector,
+ beta: Double,
+ y: DenseVector): Unit = {
+ val tStrA = if (!trans) "N" else "T"
+ nativeBLAS.dgemv(tStrA, A.numRows, A.numCols, alpha, A.values, A.numRows, x.values, 1, beta,
+ y.values, 1)
+ }
+
+ /**
+ * y := alpha * A * x + beta * y
+ * For `SparseMatrix` A.
+ */
+ private def gemv(
+ trans: Boolean,
+ alpha: Double,
+ A: SparseMatrix,
+ x: DenseVector,
+ beta: Double,
+ y: DenseVector): Unit = {
+
+ val mA: Int = if(!trans) A.numRows else A.numCols
+ val nA: Int = if(!trans) A.numCols else A.numRows
+
+ val Avals = A.values
+ val Arows = if (!trans) A.rowIndices else A.colPtrs
+ val Acols = if (!trans) A.colPtrs else A.rowIndices
+
+ // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
+ if (trans){
+ var rowCounter = 0
+ while (rowCounter < mA){
+ var i = Arows(rowCounter)
+ val indEnd = Arows(rowCounter + 1)
+ var sum = 0.0
+ while(i < indEnd){
+ sum += Avals(i) * x.values(Acols(i))
+ i += 1
+ }
+ y.values(rowCounter) = beta * y.values(rowCounter) + sum * alpha
+ rowCounter += 1
+ }
+ } else {
+ // Scale vector first if `beta` is not equal to 0.0
+ if (beta != 0.0){
+ scal(beta, y)
+ }
+ // Perform matrix-vector multiplication and add to y
+ var colCounterForA = 0
+ while (colCounterForA < nA){
+ var i = Acols(colCounterForA)
+ val indEnd = Acols(colCounterForA + 1)
+ val xVal = x.values(colCounterForA) * alpha
+ while (i < indEnd){
+ val rowIndex = Arows(i)
+ y.values(rowIndex) += Avals(i) * xVal
+ i += 1
+ }
+ colCounterForA += 1
+ }
+ }
+ }
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index b11ba5d30f..5711532abc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -17,12 +17,16 @@
package org.apache.spark.mllib.linalg
-import breeze.linalg.{Matrix => BM, DenseMatrix => BDM}
+import breeze.linalg.{Matrix => BM, DenseMatrix => BDM, CSCMatrix => BSM}
+
+import org.apache.spark.util.random.XORShiftRandom
+
+import java.util.Arrays
/**
* Trait for a local matrix.
*/
-trait Matrix extends Serializable {
+sealed trait Matrix extends Serializable {
/** Number of rows. */
def numRows: Int
@@ -37,8 +41,46 @@ trait Matrix extends Serializable {
private[mllib] def toBreeze: BM[Double]
/** Gets the (i, j)-th element. */
- private[mllib] def apply(i: Int, j: Int): Double = toBreeze(i, j)
+ private[mllib] def apply(i: Int, j: Int): Double
+
+ /** Return the index for the (i, j)-th element in the backing array. */
+ private[mllib] def index(i: Int, j: Int): Int
+
+ /** Update element at (i, j) */
+ private[mllib] def update(i: Int, j: Int, v: Double): Unit
+
+ /** Get a deep copy of the matrix. */
+ def copy: Matrix
+ /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
+ def multiply(y: DenseMatrix): DenseMatrix = {
+ val C: DenseMatrix = Matrices.zeros(numRows, y.numCols).asInstanceOf[DenseMatrix]
+ BLAS.gemm(false, false, 1.0, this, y, 0.0, C)
+ C
+ }
+
+ /** Convenience method for `Matrix`-`DenseVector` multiplication. */
+ def multiply(y: DenseVector): DenseVector = {
+ val output = new DenseVector(new Array[Double](numRows))
+ BLAS.gemv(1.0, this, y, 0.0, output)
+ output
+ }
+
+ /** Convenience method for `Matrix`^T^-`DenseMatrix` multiplication. */
+ def transposeMultiply(y: DenseMatrix): DenseMatrix = {
+ val C: DenseMatrix = Matrices.zeros(numCols, y.numCols).asInstanceOf[DenseMatrix]
+ BLAS.gemm(true, false, 1.0, this, y, 0.0, C)
+ C
+ }
+
+ /** Convenience method for `Matrix`^T^-`DenseVector` multiplication. */
+ def transposeMultiply(y: DenseVector): DenseVector = {
+ val output = new DenseVector(new Array[Double](numCols))
+ BLAS.gemv(true, 1.0, this, y, 0.0, output)
+ output
+ }
+
+ /** A human readable representation of the matrix */
override def toString: String = toBreeze.toString()
}
@@ -59,11 +101,98 @@ trait Matrix extends Serializable {
*/
class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) extends Matrix {
- require(values.length == numRows * numCols)
+ require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
+ s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}")
override def toArray: Array[Double] = values
- private[mllib] override def toBreeze: BM[Double] = new BDM[Double](numRows, numCols, values)
+ private[mllib] def toBreeze: BM[Double] = new BDM[Double](numRows, numCols, values)
+
+ private[mllib] def apply(i: Int): Double = values(i)
+
+ private[mllib] def apply(i: Int, j: Int): Double = values(index(i, j))
+
+ private[mllib] def index(i: Int, j: Int): Int = i + numRows * j
+
+ private[mllib] def update(i: Int, j: Int, v: Double): Unit = {
+ values(index(i, j)) = v
+ }
+
+ override def copy = new DenseMatrix(numRows, numCols, values.clone())
+}
+
+/**
+ * Column-majored sparse matrix.
+ * The entry values are stored in Compressed Sparse Column (CSC) format.
+ * For example, the following matrix
+ * {{{
+ * 1.0 0.0 4.0
+ * 0.0 3.0 5.0
+ * 2.0 0.0 6.0
+ * }}}
+ * is stored as `values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]`,
+ * `rowIndices=[0, 2, 1, 0, 1, 2]`, `colPointers=[0, 2, 3, 6]`.
+ *
+ * @param numRows number of rows
+ * @param numCols number of columns
+ * @param colPtrs the index corresponding to the start of a new column
+ * @param rowIndices the row index of the entry. They must be in strictly increasing order for each
+ * column
+ * @param values non-zero matrix entries in column major
+ */
+class SparseMatrix(
+ val numRows: Int,
+ val numCols: Int,
+ val colPtrs: Array[Int],
+ val rowIndices: Array[Int],
+ val values: Array[Double]) extends Matrix {
+
+ require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
+ s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}")
+ require(colPtrs.length == numCols + 1, "The length of the column indices should be the " +
+ s"number of columns + 1. Currently, colPointers.length: ${colPtrs.length}, " +
+ s"numCols: $numCols")
+
+ override def toArray: Array[Double] = {
+ val arr = new Array[Double](numRows * numCols)
+ var j = 0
+ while (j < numCols) {
+ var i = colPtrs(j)
+ val indEnd = colPtrs(j + 1)
+ val offset = j * numRows
+ while (i < indEnd) {
+ val rowIndex = rowIndices(i)
+ arr(offset + rowIndex) = values(i)
+ i += 1
+ }
+ j += 1
+ }
+ arr
+ }
+
+ private[mllib] def toBreeze: BM[Double] =
+ new BSM[Double](values, numRows, numCols, colPtrs, rowIndices)
+
+ private[mllib] def apply(i: Int, j: Int): Double = {
+ val ind = index(i, j)
+ if (ind < 0) 0.0 else values(ind)
+ }
+
+ private[mllib] def index(i: Int, j: Int): Int = {
+ Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i)
+ }
+
+ private[mllib] def update(i: Int, j: Int, v: Double): Unit = {
+ val ind = index(i, j)
+ if (ind == -1){
+ throw new NoSuchElementException("The given row and column indices correspond to a zero " +
+ "value. Only non-zero elements in Sparse Matrices can be updated.")
+ } else {
+ values(index(i, j)) = v
+ }
+ }
+
+ override def copy = new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
}
/**
@@ -83,6 +212,24 @@ object Matrices {
}
/**
+ * Creates a column-majored sparse matrix in Compressed Sparse Column (CSC) format.
+ *
+ * @param numRows number of rows
+ * @param numCols number of columns
+ * @param colPtrs the index corresponding to the start of a new column
+ * @param rowIndices the row index of the entry
+ * @param values non-zero matrix entries in column major
+ */
+ def sparse(
+ numRows: Int,
+ numCols: Int,
+ colPtrs: Array[Int],
+ rowIndices: Array[Int],
+ values: Array[Double]): Matrix = {
+ new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
+ }
+
+ /**
* Creates a Matrix instance from a breeze matrix.
* @param breeze a breeze matrix
* @return a Matrix instance
@@ -93,9 +240,84 @@ object Matrices {
require(dm.majorStride == dm.rows,
"Do not support stride size different from the number of rows.")
new DenseMatrix(dm.rows, dm.cols, dm.data)
+ case sm: BSM[Double] =>
+ new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data)
case _ =>
throw new UnsupportedOperationException(
s"Do not support conversion from type ${breeze.getClass.getName}.")
}
}
+
+ /**
+ * Generate a `DenseMatrix` consisting of zeros.
+ * @param numRows number of rows of the matrix
+ * @param numCols number of columns of the matrix
+ * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
+ */
+ def zeros(numRows: Int, numCols: Int): Matrix =
+ new DenseMatrix(numRows, numCols, new Array[Double](numRows * numCols))
+
+ /**
+ * Generate a `DenseMatrix` consisting of ones.
+ * @param numRows number of rows of the matrix
+ * @param numCols number of columns of the matrix
+ * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
+ */
+ def ones(numRows: Int, numCols: Int): Matrix =
+ new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(1.0))
+
+ /**
+ * Generate an Identity Matrix in `DenseMatrix` format.
+ * @param n number of rows and columns of the matrix
+ * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
+ */
+ def eye(n: Int): Matrix = {
+ val identity = Matrices.zeros(n, n)
+ var i = 0
+ while (i < n){
+ identity.update(i, i, 1.0)
+ i += 1
+ }
+ identity
+ }
+
+ /**
+ * Generate a `DenseMatrix` consisting of i.i.d. uniform random numbers.
+ * @param numRows number of rows of the matrix
+ * @param numCols number of columns of the matrix
+ * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+ */
+ def rand(numRows: Int, numCols: Int): Matrix = {
+ val rand = new XORShiftRandom
+ new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rand.nextDouble()))
+ }
+
+ /**
+ * Generate a `DenseMatrix` consisting of i.i.d. gaussian random numbers.
+ * @param numRows number of rows of the matrix
+ * @param numCols number of columns of the matrix
+ * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+ */
+ def randn(numRows: Int, numCols: Int): Matrix = {
+ val rand = new XORShiftRandom
+ new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rand.nextGaussian()))
+ }
+
+ /**
+ * Generate a diagonal matrix in `DenseMatrix` format from the supplied values.
+ * @param vector a `Vector` tat will form the values on the diagonal of the matrix
+ * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
+ * on the diagonal
+ */
+ def diag(vector: Vector): Matrix = {
+ val n = vector.size
+ val matrix = Matrices.eye(n)
+ val values = vector.toArray
+ var i = 0
+ while (i < n) {
+ matrix.update(i, i, values(i))
+ i += 1
+ }
+ matrix
+ }
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index a45781d12e..6af225b7f4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -33,7 +33,7 @@ import org.apache.spark.SparkException
*
* Note: Users should not implement this interface.
*/
-trait Vector extends Serializable {
+sealed trait Vector extends Serializable {
/**
* Size of the vector.