diff options
author | Sameer Abhyankar <sabhyankar@sabhyankar-MBP.Samavihome> | 2015-08-17 16:00:23 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-08-17 16:00:31 -0700 |
commit | 0f1417b6f31e53dd78aae2a0a661d9ba32dce5b7 (patch) | |
tree | c903d4fc7e0acfd518be712449dec9fda4495772 /mllib/src | |
parent | bb3bb2a48ee32a5de4637a73dd11930c72f9c77e (diff) | |
download | spark-0f1417b6f31e53dd78aae2a0a661d9ba32dce5b7.tar.gz spark-0f1417b6f31e53dd78aae2a0a661d9ba32dce5b7.tar.bz2 spark-0f1417b6f31e53dd78aae2a0a661d9ba32dce5b7.zip |
[SPARK-8920] [MLLIB] Add @since tags to mllib.linalg
Author: Sameer Abhyankar <sabhyankar@sabhyankar-MBP.Samavihome>
Author: Sameer Abhyankar <sabhyankar@sabhyankar-MBP.local>
Closes #7729 from sabhyankar/branch_8920.
(cherry picked from commit 088b11ec5949e135cb3db2a1ce136837e046c288)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
Diffstat (limited to 'mllib/src')
8 files changed, 227 insertions, 17 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 1139ce36d5..dfa8910fcb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -227,6 +227,7 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] { * @param values matrix entries in column major if not transposed or in row major otherwise * @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in * row major. + * @since 1.0.0 */ @SQLUserDefinedType(udt = classOf[MatrixUDT]) class DenseMatrix( @@ -252,6 +253,7 @@ class DenseMatrix( * @param numRows number of rows * @param numCols number of columns * @param values matrix entries in column major + * @since 1.3.0 */ def this(numRows: Int, numCols: Int, values: Array[Double]) = this(numRows, numCols, values, false) @@ -276,6 +278,9 @@ class DenseMatrix( private[mllib] def apply(i: Int): Double = values(i) + /** + * @since 1.3.0 + */ override def apply(i: Int, j: Int): Double = values(index(i, j)) private[mllib] def index(i: Int, j: Int): Int = { @@ -286,6 +291,9 @@ class DenseMatrix( values(index(i, j)) = v } + /** + * @since 1.4.0 + */ override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone()) private[spark] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f), @@ -301,6 +309,9 @@ class DenseMatrix( this } + /** + * @since 1.3.0 + */ override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed) private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = { @@ -331,13 +342,20 @@ class DenseMatrix( } } + /** + * @since 1.5.0 + */ override def numNonzeros: Int = values.count(_ != 0) + /** + * @since 1.5.0 + */ override def numActives: Int = values.length /** * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed * set to false. + * @since 1.3.0 */ def toSparse: SparseMatrix = { val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble @@ -365,6 +383,7 @@ class DenseMatrix( /** * Factory methods for [[org.apache.spark.mllib.linalg.DenseMatrix]]. + * @since 1.3.0 */ object DenseMatrix { @@ -373,6 +392,7 @@ object DenseMatrix { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros + * @since 1.3.0 */ def zeros(numRows: Int, numCols: Int): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -385,6 +405,7 @@ object DenseMatrix { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones + * @since 1.3.0 */ def ones(numRows: Int, numCols: Int): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -396,6 +417,7 @@ object DenseMatrix { * Generate an Identity Matrix in `DenseMatrix` format. * @param n number of rows and columns of the matrix * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.3.0 */ def eye(n: Int): DenseMatrix = { val identity = DenseMatrix.zeros(n, n) @@ -413,6 +435,7 @@ object DenseMatrix { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.3.0 */ def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -426,6 +449,7 @@ object DenseMatrix { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.3.0 */ def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -438,6 +462,7 @@ object DenseMatrix { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values` * on the diagonal + * @since 1.3.0 */ def diag(vector: Vector): DenseMatrix = { val n = vector.size @@ -473,6 +498,7 @@ object DenseMatrix { * @param isTransposed whether the matrix is transposed. If true, the matrix can be considered * Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs, * and `rowIndices` behave as colIndices, and `values` are stored in row major. + * @since 1.2.0 */ @SQLUserDefinedType(udt = classOf[MatrixUDT]) class SparseMatrix( @@ -510,6 +536,7 @@ class SparseMatrix( * @param rowIndices the row index of the entry. They must be in strictly increasing * order for each column * @param values non-zero matrix entries in column major + * @since 1.3.0 */ def this( numRows: Int, @@ -532,6 +559,9 @@ class SparseMatrix( } } + /** + * @since 1.3.0 + */ override def apply(i: Int, j: Int): Double = { val ind = index(i, j) if (ind < 0) 0.0 else values(ind) @@ -555,6 +585,9 @@ class SparseMatrix( } } + /** + * @since 1.4.0 + */ override def copy: SparseMatrix = { new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone()) } @@ -572,6 +605,9 @@ class SparseMatrix( this } + /** + * @since 1.3.0 + */ override def transpose: SparseMatrix = new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed) @@ -605,19 +641,27 @@ class SparseMatrix( /** * Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed * set to false. + * @since 1.3.0 */ def toDense: DenseMatrix = { new DenseMatrix(numRows, numCols, toArray) } + /** + * @since 1.5.0 + */ override def numNonzeros: Int = values.count(_ != 0) + /** + * @since 1.5.0 + */ override def numActives: Int = values.length } /** * Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]]. + * @since 1.3.0 */ object SparseMatrix { @@ -629,6 +673,7 @@ object SparseMatrix { * @param numCols number of columns of the matrix * @param entries Array of (i, j, value) tuples * @return The corresponding `SparseMatrix` + * @since 1.3.0 */ def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = { val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1)) @@ -677,6 +722,7 @@ object SparseMatrix { * Generate an Identity Matrix in `SparseMatrix` format. * @param n number of rows and columns of the matrix * @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.3.0 */ def speye(n: Int): SparseMatrix = { new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0)) @@ -746,6 +792,7 @@ object SparseMatrix { * @param density the desired density for the matrix * @param rng a random number generator * @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.3.0 */ def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = { val mat = genRandMatrix(numRows, numCols, density, rng) @@ -759,6 +806,7 @@ object SparseMatrix { * @param density the desired density for the matrix * @param rng a random number generator * @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.3.0 */ def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = { val mat = genRandMatrix(numRows, numCols, density, rng) @@ -770,6 +818,7 @@ object SparseMatrix { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero * `values` on the diagonal + * @since 1.3.0 */ def spdiag(vector: Vector): SparseMatrix = { val n = vector.size @@ -786,6 +835,7 @@ object SparseMatrix { /** * Factory methods for [[org.apache.spark.mllib.linalg.Matrix]]. + * @since 1.0.0 */ object Matrices { @@ -795,6 +845,7 @@ object Matrices { * @param numRows number of rows * @param numCols number of columns * @param values matrix entries in column major + * @since 1.0.0 */ def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = { new DenseMatrix(numRows, numCols, values) @@ -808,6 +859,7 @@ object Matrices { * @param colPtrs the index corresponding to the start of a new column * @param rowIndices the row index of the entry * @param values non-zero matrix entries in column major + * @since 1.2.0 */ def sparse( numRows: Int, @@ -841,6 +893,7 @@ object Matrices { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `Matrix` with size `numRows` x `numCols` and values of zeros + * @since 1.2.0 */ def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols) @@ -849,6 +902,7 @@ object Matrices { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `Matrix` with size `numRows` x `numCols` and values of ones + * @since 1.2.0 */ def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols) @@ -856,6 +910,7 @@ object Matrices { * Generate a dense Identity Matrix in `Matrix` format. * @param n number of rows and columns of the matrix * @return `Matrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.2.0 */ def eye(n: Int): Matrix = DenseMatrix.eye(n) @@ -863,6 +918,7 @@ object Matrices { * Generate a sparse Identity Matrix in `Matrix` format. * @param n number of rows and columns of the matrix * @return `Matrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.3.0 */ def speye(n: Int): Matrix = SparseMatrix.speye(n) @@ -872,6 +928,7 @@ object Matrices { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.2.0 */ def rand(numRows: Int, numCols: Int, rng: Random): Matrix = DenseMatrix.rand(numRows, numCols, rng) @@ -883,6 +940,7 @@ object Matrices { * @param density the desired density for the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.3.0 */ def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix = SparseMatrix.sprand(numRows, numCols, density, rng) @@ -893,6 +951,7 @@ object Matrices { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.2.0 */ def randn(numRows: Int, numCols: Int, rng: Random): Matrix = DenseMatrix.randn(numRows, numCols, rng) @@ -904,6 +963,7 @@ object Matrices { * @param density the desired density for the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.3.0 */ def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix = SparseMatrix.sprandn(numRows, numCols, density, rng) @@ -913,6 +973,7 @@ object Matrices { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `Matrix` with size `values.length` x `values.length` and `values` * on the diagonal + * @since 1.2.0 */ def diag(vector: Vector): Matrix = DenseMatrix.diag(vector) @@ -922,6 +983,7 @@ object Matrices { * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned. * @param matrices array of matrices * @return a single `Matrix` composed of the matrices that were horizontally concatenated + * @since 1.3.0 */ def horzcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { @@ -980,6 +1042,7 @@ object Matrices { * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned. * @param matrices array of matrices * @return a single `Matrix` composed of the matrices that were vertically concatenated + * @since 1.3.0 */ def vertcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala index cff5dbeee3..8f504f6984 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala @@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: * Represents singular value decomposition (SVD) factors. + * @since 1.0.0 */ @Experimental case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index df15d985c8..52ef7be3b3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -240,11 +240,13 @@ class VectorUDT extends UserDefinedType[Vector] { * Factory methods for [[org.apache.spark.mllib.linalg.Vector]]. * We don't use the name `Vector` because Scala imports * [[scala.collection.immutable.Vector]] by default. + * @since 1.0.0 */ object Vectors { /** * Creates a dense vector from its values. + * @since 1.0.0 */ @varargs def dense(firstValue: Double, otherValues: Double*): Vector = @@ -253,6 +255,7 @@ object Vectors { // A dummy implicit is used to avoid signature collision with the one generated by @varargs. /** * Creates a dense vector from a double array. + * @since 1.0.0 */ def dense(values: Array[Double]): Vector = new DenseVector(values) @@ -262,6 +265,7 @@ object Vectors { * @param size vector size. * @param indices index array, must be strictly increasing. * @param values value array, must have the same length as indices. + * @since 1.0.0 */ def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector = new SparseVector(size, indices, values) @@ -271,6 +275,7 @@ object Vectors { * * @param size vector size. * @param elements vector elements in (index, value) pairs. + * @since 1.0.0 */ def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = { require(size > 0, "The size of the requested sparse vector must be greater than 0.") @@ -292,6 +297,7 @@ object Vectors { * * @param size vector size. * @param elements vector elements in (index, value) pairs. + * @since 1.0.0 */ def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = { sparse(size, elements.asScala.map { case (i, x) => @@ -304,6 +310,7 @@ object Vectors { * * @param size vector size * @return a zero vector + * @since 1.1.0 */ def zeros(size: Int): Vector = { new DenseVector(new Array[Double](size)) @@ -311,6 +318,7 @@ object Vectors { /** * Parses a string resulted from [[Vector.toString]] into a [[Vector]]. + * @since 1.1.0 */ def parse(s: String): Vector = { parseNumeric(NumericParser.parse(s)) @@ -354,6 +362,7 @@ object Vectors { * @param vector input vector. * @param p norm. * @return norm in L^p^ space. + * @since 1.3.0 */ def norm(vector: Vector, p: Double): Double = { require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " + @@ -406,6 +415,7 @@ object Vectors { * @param v1 first Vector. * @param v2 second Vector. * @return squared distance between two Vectors. + * @since 1.3.0 */ def sqdist(v1: Vector, v2: Vector): Double = { require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" + @@ -519,20 +529,33 @@ object Vectors { /** * A dense vector represented by a value array. + * @since 1.0.0 */ @SQLUserDefinedType(udt = classOf[VectorUDT]) class DenseVector(val values: Array[Double]) extends Vector { + /** + * @since 1.0.0 + */ override def size: Int = values.length override def toString: String = values.mkString("[", ",", "]") + /** + * @since 1.0.0 + */ override def toArray: Array[Double] = values private[spark] override def toBreeze: BV[Double] = new BDV[Double](values) + /** + * @since 1.0.0 + */ override def apply(i: Int): Double = values(i) + /** + * @since 1.1.0 + */ override def copy: DenseVector = { new DenseVector(values.clone()) } @@ -564,8 +587,14 @@ class DenseVector(val values: Array[Double]) extends Vector { result } + /** + * @since 1.4.0 + */ override def numActives: Int = size + /** + * @since 1.4.0 + */ override def numNonzeros: Int = { // same as values.count(_ != 0.0) but faster var nnz = 0 @@ -577,6 +606,9 @@ class DenseVector(val values: Array[Double]) extends Vector { nnz } + /** + * @since 1.4.0 + */ override def toSparse: SparseVector = { val nnz = numNonzeros val ii = new Array[Int](nnz) @@ -592,6 +624,9 @@ class DenseVector(val values: Array[Double]) extends Vector { new SparseVector(size, ii, vv) } + /** + * @since 1.5.0 + */ override def argmax: Int = { if (size == 0) { -1 @@ -611,6 +646,9 @@ class DenseVector(val values: Array[Double]) extends Vector { } } +/** + * @since 1.3.0 + */ object DenseVector { /** Extracts the value array from a dense vector. */ def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values) @@ -622,6 +660,7 @@ object DenseVector { * @param size size of the vector. * @param indices index array, assume to be strictly increasing. * @param values value array, must have the same length as the index array. + * @since 1.0.0 */ @SQLUserDefinedType(udt = classOf[VectorUDT]) class SparseVector( @@ -638,6 +677,9 @@ class SparseVector( override def toString: String = s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})" + /** + * @since 1.0.0 + */ override def toArray: Array[Double] = { val data = new Array[Double](size) var i = 0 @@ -649,6 +691,9 @@ class SparseVector( data } + /** + * @since 1.1.0 + */ override def copy: SparseVector = { new SparseVector(size, indices.clone(), values.clone()) } @@ -689,8 +734,14 @@ class SparseVector( result } + /** + * @since 1.4.0 + */ override def numActives: Int = values.length + /** + * @since 1.4.0 + */ override def numNonzeros: Int = { var nnz = 0 values.foreach { v => @@ -701,6 +752,9 @@ class SparseVector( nnz } + /** + * @since 1.4.0 + */ override def toSparse: SparseVector = { val nnz = numNonzeros if (nnz == numActives) { @@ -720,6 +774,9 @@ class SparseVector( } } + /** + * @since 1.5.0 + */ override def argmax: Int = { if (size == 0) { -1 @@ -790,6 +847,9 @@ class SparseVector( } } +/** + * @since 1.3.0 + */ object SparseVector { def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] = Some((sv.size, sv.indices, sv.values)) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 3323ae7b1f..cfb6680a18 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -128,6 +128,8 @@ private[mllib] object GridPartitioner { * the number of rows will be calculated when `numRows` is invoked. * @param nCols Number of columns of this matrix. If the supplied value is less than or equal to * zero, the number of columns will be calculated when `numCols` is invoked. + * @since 1.3.0 + * */ @Experimental class BlockMatrix( @@ -149,6 +151,9 @@ class BlockMatrix( * rows are not required to have the given number of rows * @param colsPerBlock Number of columns that make up each block. The blocks forming the final * columns are not required to have the given number of columns + * + * @since 1.3.0 + * */ def this( blocks: RDD[((Int, Int), Matrix)], @@ -157,11 +162,20 @@ class BlockMatrix( this(blocks, rowsPerBlock, colsPerBlock, 0L, 0L) } + /** + * @since 1.3.0 + * */ + override def numRows(): Long = { if (nRows <= 0L) estimateDim() nRows } + /** + * + * @since 1.3.0 + */ + override def numCols(): Long = { if (nCols <= 0L) estimateDim() nCols @@ -192,6 +206,7 @@ class BlockMatrix( /** * Validates the block matrix info against the matrix data (`blocks`) and throws an exception if * any error is found. + * @since 1.3.0 */ def validate(): Unit = { logDebug("Validating BlockMatrix...") @@ -228,19 +243,25 @@ class BlockMatrix( logDebug("BlockMatrix is valid!") } - /** Caches the underlying RDD. */ + /** Caches the underlying RDD. + * @since 1.3.0 + * */ def cache(): this.type = { blocks.cache() this } - /** Persists the underlying RDD with the specified storage level. */ + /** Persists the underlying RDD with the specified storage level. + * @since 1.3.0 + * */ def persist(storageLevel: StorageLevel): this.type = { blocks.persist(storageLevel) this } - /** Converts to CoordinateMatrix. */ + /** Converts to CoordinateMatrix. + * @since 1.3.0 + * */ def toCoordinateMatrix(): CoordinateMatrix = { val entryRDD = blocks.flatMap { case ((blockRowIndex, blockColIndex), mat) => val rowStart = blockRowIndex.toLong * rowsPerBlock @@ -254,7 +275,9 @@ class BlockMatrix( new CoordinateMatrix(entryRDD, numRows(), numCols()) } - /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */ + /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. + * @since 1.3.0 + * */ def toIndexedRowMatrix(): IndexedRowMatrix = { require(numCols() < Int.MaxValue, "The number of columns must be within the integer range. " + s"numCols: ${numCols()}") @@ -262,7 +285,9 @@ class BlockMatrix( toCoordinateMatrix().toIndexedRowMatrix() } - /** Collect the distributed matrix on the driver as a `DenseMatrix`. */ + /** Collect the distributed matrix on the driver as a `DenseMatrix`. + * @since 1.3.0 + * */ def toLocalMatrix(): Matrix = { require(numRows() < Int.MaxValue, "The number of rows of this matrix should be less than " + s"Int.MaxValue. Currently numRows: ${numRows()}") @@ -288,7 +313,10 @@ class BlockMatrix( } /** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the - * same underlying data. Is a lazy operation. */ + * same underlying data. Is a lazy operation. + * @since 1.3.0 + * + * */ def transpose: BlockMatrix = { val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) => ((blockColIndex, blockRowIndex), mat.transpose) @@ -307,6 +335,7 @@ class BlockMatrix( * instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even * if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will * also be a [[DenseMatrix]]. + * @since 1.3.0 */ def add(other: BlockMatrix): BlockMatrix = { require(numRows() == other.numRows(), "Both matrices must have the same number of rows. " + @@ -340,6 +369,8 @@ class BlockMatrix( * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause * some performance issues until support for multiplying two sparse matrices is added. + * + * @since 1.3.0 */ def multiply(other: BlockMatrix): BlockMatrix = { require(numCols() == other.numRows(), "The number of columns of A and the number of rows " + diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala index 078d1fac44..2b751e45dd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala @@ -29,6 +29,7 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors} * @param i row index * @param j column index * @param value value of the entry + * @since 1.0.0 */ @Experimental case class MatrixEntry(i: Long, j: Long, value: Double) @@ -42,6 +43,7 @@ case class MatrixEntry(i: Long, j: Long, value: Double) * be determined by the max row index plus one. * @param nCols number of columns. A non-positive value means unknown, and then the number of * columns will be determined by the max column index plus one. + * @since 1.0.0 */ @Experimental class CoordinateMatrix( @@ -49,10 +51,14 @@ class CoordinateMatrix( private var nRows: Long, private var nCols: Long) extends DistributedMatrix { - /** Alternative constructor leaving matrix dimensions to be determined automatically. */ + /** Alternative constructor leaving matrix dimensions to be determined automatically. + * @since 1.0.0 + * */ def this(entries: RDD[MatrixEntry]) = this(entries, 0L, 0L) - /** Gets or computes the number of columns. */ + /** Gets or computes the number of columns. + * @since 1.0.0 + * */ override def numCols(): Long = { if (nCols <= 0L) { computeSize() @@ -60,7 +66,9 @@ class CoordinateMatrix( nCols } - /** Gets or computes the number of rows. */ + /** Gets or computes the number of rows. + * @since 1.0.0 + * */ override def numRows(): Long = { if (nRows <= 0L) { computeSize() @@ -68,12 +76,16 @@ class CoordinateMatrix( nRows } - /** Transposes this CoordinateMatrix. */ + /** Transposes this CoordinateMatrix. + * @since 1.3.0 + * */ def transpose(): CoordinateMatrix = { new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows()) } - /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */ + /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. + * @since 1.0.0 + * */ def toIndexedRowMatrix(): IndexedRowMatrix = { val nl = numCols() if (nl > Int.MaxValue) { @@ -92,12 +104,15 @@ class CoordinateMatrix( /** * Converts to RowMatrix, dropping row indices after grouping by row index. * The number of columns must be within the integer range. + * @since 1.0.0 */ def toRowMatrix(): RowMatrix = { toIndexedRowMatrix().toRowMatrix() } - /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */ + /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. + * @since 1.3.0 + * */ def toBlockMatrix(): BlockMatrix = { toBlockMatrix(1024, 1024) } @@ -109,6 +124,7 @@ class CoordinateMatrix( * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have * a smaller value. Must be an integer value greater than 0. * @return a [[BlockMatrix]] + * @since 1.3.0 */ def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = { require(rowsPerBlock > 0, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala index a0e26ce3bc..98e90af84a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala @@ -21,6 +21,7 @@ import breeze.linalg.{DenseMatrix => BDM} /** * Represents a distributively stored matrix backed by one or more RDDs. + * @since 1.0.0 */ trait DistributedMatrix extends Serializable { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index 1c33b43ea7..a09f88ce28 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -27,6 +27,7 @@ import org.apache.spark.mllib.linalg.SingularValueDecomposition /** * :: Experimental :: * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]]. + * @since 1.0.0 */ @Experimental case class IndexedRow(index: Long, vector: Vector) @@ -41,6 +42,7 @@ case class IndexedRow(index: Long, vector: Vector) * be determined by the max row index plus one. * @param nCols number of columns. A non-positive value means unknown, and then the number of * columns will be determined by the size of the first row. + * @since 1.0.0 */ @Experimental class IndexedRowMatrix( @@ -48,9 +50,15 @@ class IndexedRowMatrix( private var nRows: Long, private var nCols: Int) extends DistributedMatrix { - /** Alternative constructor leaving matrix dimensions to be determined automatically. */ + /** Alternative constructor leaving matrix dimensions to be determined automatically. + * @since 1.0.0 + * */ def this(rows: RDD[IndexedRow]) = this(rows, 0L, 0) + /** + * + * @since 1.0.0 + */ override def numCols(): Long = { if (nCols <= 0) { // Calling `first` will throw an exception if `rows` is empty. @@ -59,6 +67,10 @@ class IndexedRowMatrix( nCols } + /** + * + * @since 1.0.0 + */ override def numRows(): Long = { if (nRows <= 0L) { // Reduce will throw an exception if `rows` is empty. @@ -70,12 +82,15 @@ class IndexedRowMatrix( /** * Drops row indices and converts this matrix to a * [[org.apache.spark.mllib.linalg.distributed.RowMatrix]]. + * @since 1.0.0 */ def toRowMatrix(): RowMatrix = { new RowMatrix(rows.map(_.vector), 0L, nCols) } - /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */ + /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. + * @since 1.3.0 + * */ def toBlockMatrix(): BlockMatrix = { toBlockMatrix(1024, 1024) } @@ -87,6 +102,7 @@ class IndexedRowMatrix( * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have * a smaller value. Must be an integer value greater than 0. * @return a [[BlockMatrix]] + * @since 1.3.0 */ def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = { // TODO: This implementation may be optimized @@ -96,6 +112,7 @@ class IndexedRowMatrix( /** * Converts this matrix to a * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]]. + * @since 1.3.0 */ def toCoordinateMatrix(): CoordinateMatrix = { val entries = rows.flatMap { row => @@ -132,6 +149,7 @@ class IndexedRowMatrix( * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0) * are treated as zero, where sigma(0) is the largest singular value. * @return SingularValueDecomposition(U, s, V) + * @since 1.0.0 */ def computeSVD( k: Int, @@ -158,6 +176,7 @@ class IndexedRowMatrix( * * @param B a local matrix whose number of rows must match the number of columns of this matrix * @return an IndexedRowMatrix representing the product, which preserves partitioning + * @since 1.0.0 */ def multiply(B: Matrix): IndexedRowMatrix = { val mat = toRowMatrix().multiply(B) @@ -169,6 +188,7 @@ class IndexedRowMatrix( /** * Computes the Gramian matrix `A^T A`. + * @since 1.0.0 */ def computeGramianMatrix(): Matrix = { toRowMatrix().computeGramianMatrix() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index bfc90c9ef8..b2e94f2dd6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -44,6 +44,7 @@ import org.apache.spark.storage.StorageLevel * be determined by the number of records in the RDD `rows`. * @param nCols number of columns. A non-positive value means unknown, and then the number of * columns will be determined by the size of the first row. + * @since 1.0.0 */ @Experimental class RowMatrix( @@ -51,10 +52,14 @@ class RowMatrix( private var nRows: Long, private var nCols: Int) extends DistributedMatrix with Logging { - /** Alternative constructor leaving matrix dimensions to be determined automatically. */ + /** Alternative constructor leaving matrix dimensions to be determined automatically. + * @since 1.0.0 + * */ def this(rows: RDD[Vector]) = this(rows, 0L, 0) - /** Gets or computes the number of columns. */ + /** Gets or computes the number of columns. + * @since 1.0.0 + * */ override def numCols(): Long = { if (nCols <= 0) { try { @@ -69,7 +74,9 @@ class RowMatrix( nCols } - /** Gets or computes the number of rows. */ + /** Gets or computes the number of rows. + * @since 1.0.0 + * */ override def numRows(): Long = { if (nRows <= 0L) { nRows = rows.count() @@ -107,6 +114,7 @@ class RowMatrix( /** * Computes the Gramian matrix `A^T A`. + * @since 1.0.0 */ def computeGramianMatrix(): Matrix = { val n = numCols().toInt @@ -177,6 +185,7 @@ class RowMatrix( * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0) * are treated as zero, where sigma(0) is the largest singular value. * @return SingularValueDecomposition(U, s, V). U = null if computeU = false. + * @since 1.0.0 */ def computeSVD( k: Int, @@ -317,6 +326,7 @@ class RowMatrix( /** * Computes the covariance matrix, treating each row as an observation. * @return a local dense matrix of size n x n + * @since 1.0.0 */ def computeCovariance(): Matrix = { val n = numCols().toInt @@ -370,6 +380,7 @@ class RowMatrix( * * @param k number of top principal components. * @return a matrix of size n-by-k, whose columns are principal components + * @since 1.0.0 */ def computePrincipalComponents(k: Int): Matrix = { val n = numCols().toInt @@ -388,6 +399,7 @@ class RowMatrix( /** * Computes column-wise summary statistics. + * @since 1.0.0 */ def computeColumnSummaryStatistics(): MultivariateStatisticalSummary = { val summary = rows.treeAggregate(new MultivariateOnlineSummarizer)( @@ -403,6 +415,7 @@ class RowMatrix( * @param B a local matrix whose number of rows must match the number of columns of this matrix * @return a [[org.apache.spark.mllib.linalg.distributed.RowMatrix]] representing the product, * which preserves partitioning + * @since 1.0.0 */ def multiply(B: Matrix): RowMatrix = { val n = numCols().toInt @@ -435,6 +448,7 @@ class RowMatrix( * * @return An n x n sparse upper-triangular matrix of cosine similarities between * columns of this matrix. + * @since 1.2.0 */ def columnSimilarities(): CoordinateMatrix = { columnSimilarities(0.0) @@ -478,6 +492,7 @@ class RowMatrix( * with the cost vs estimate quality trade-off described above. * @return An n x n sparse upper-triangular matrix of cosine similarities * between columns of this matrix. + * @since 1.2.0 */ def columnSimilarities(threshold: Double): CoordinateMatrix = { require(threshold >= 0, s"Threshold cannot be negative: $threshold") @@ -656,6 +671,9 @@ class RowMatrix( } } +/** + * @since 1.0.0 + */ @Experimental object RowMatrix { |