aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorSameer Abhyankar <sabhyankar@sabhyankar-MBP.Samavihome>2015-08-17 16:00:23 -0700
committerXiangrui Meng <meng@databricks.com>2015-08-17 16:00:23 -0700
commit088b11ec5949e135cb3db2a1ce136837e046c288 (patch)
tree775d0003ed4f26c375cb2a847f85b447a7bf8b4a /mllib
parentfdaf17f63f751f02623414fbc7d0a2f545364050 (diff)
downloadspark-088b11ec5949e135cb3db2a1ce136837e046c288.tar.gz
spark-088b11ec5949e135cb3db2a1ce136837e046c288.tar.bz2
spark-088b11ec5949e135cb3db2a1ce136837e046c288.zip
[SPARK-8920] [MLLIB] Add @since tags to mllib.linalg
Author: Sameer Abhyankar <sabhyankar@sabhyankar-MBP.Samavihome> Author: Sameer Abhyankar <sabhyankar@sabhyankar-MBP.local> Closes #7729 from sabhyankar/branch_8920.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala63
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala60
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala43
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala24
8 files changed, 227 insertions, 17 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 1139ce36d5..dfa8910fcb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -227,6 +227,7 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
* @param values matrix entries in column major if not transposed or in row major otherwise
* @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
* row major.
+ * @since 1.0.0
*/
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class DenseMatrix(
@@ -252,6 +253,7 @@ class DenseMatrix(
* @param numRows number of rows
* @param numCols number of columns
* @param values matrix entries in column major
+ * @since 1.3.0
*/
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)
@@ -276,6 +278,9 @@ class DenseMatrix(
private[mllib] def apply(i: Int): Double = values(i)
+ /**
+ * @since 1.3.0
+ */
override def apply(i: Int, j: Int): Double = values(index(i, j))
private[mllib] def index(i: Int, j: Int): Int = {
@@ -286,6 +291,9 @@ class DenseMatrix(
values(index(i, j)) = v
}
+ /**
+ * @since 1.4.0
+ */
override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone())
private[spark] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f),
@@ -301,6 +309,9 @@ class DenseMatrix(
this
}
+ /**
+ * @since 1.3.0
+ */
override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)
private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
@@ -331,13 +342,20 @@ class DenseMatrix(
}
}
+ /**
+ * @since 1.5.0
+ */
override def numNonzeros: Int = values.count(_ != 0)
+ /**
+ * @since 1.5.0
+ */
override def numActives: Int = values.length
/**
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
* set to false.
+ * @since 1.3.0
*/
def toSparse: SparseMatrix = {
val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
@@ -365,6 +383,7 @@ class DenseMatrix(
/**
* Factory methods for [[org.apache.spark.mllib.linalg.DenseMatrix]].
+ * @since 1.3.0
*/
object DenseMatrix {
@@ -373,6 +392,7 @@ object DenseMatrix {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
+ * @since 1.3.0
*/
def zeros(numRows: Int, numCols: Int): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
@@ -385,6 +405,7 @@ object DenseMatrix {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
+ * @since 1.3.0
*/
def ones(numRows: Int, numCols: Int): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
@@ -396,6 +417,7 @@ object DenseMatrix {
* Generate an Identity Matrix in `DenseMatrix` format.
* @param n number of rows and columns of the matrix
* @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
+ * @since 1.3.0
*/
def eye(n: Int): DenseMatrix = {
val identity = DenseMatrix.zeros(n, n)
@@ -413,6 +435,7 @@ object DenseMatrix {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+ * @since 1.3.0
*/
def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
@@ -426,6 +449,7 @@ object DenseMatrix {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+ * @since 1.3.0
*/
def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
@@ -438,6 +462,7 @@ object DenseMatrix {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
* on the diagonal
+ * @since 1.3.0
*/
def diag(vector: Vector): DenseMatrix = {
val n = vector.size
@@ -473,6 +498,7 @@ object DenseMatrix {
* @param isTransposed whether the matrix is transposed. If true, the matrix can be considered
* Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
* and `rowIndices` behave as colIndices, and `values` are stored in row major.
+ * @since 1.2.0
*/
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class SparseMatrix(
@@ -510,6 +536,7 @@ class SparseMatrix(
* @param rowIndices the row index of the entry. They must be in strictly increasing
* order for each column
* @param values non-zero matrix entries in column major
+ * @since 1.3.0
*/
def this(
numRows: Int,
@@ -532,6 +559,9 @@ class SparseMatrix(
}
}
+ /**
+ * @since 1.3.0
+ */
override def apply(i: Int, j: Int): Double = {
val ind = index(i, j)
if (ind < 0) 0.0 else values(ind)
@@ -555,6 +585,9 @@ class SparseMatrix(
}
}
+ /**
+ * @since 1.4.0
+ */
override def copy: SparseMatrix = {
new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
}
@@ -572,6 +605,9 @@ class SparseMatrix(
this
}
+ /**
+ * @since 1.3.0
+ */
override def transpose: SparseMatrix =
new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)
@@ -605,19 +641,27 @@ class SparseMatrix(
/**
* Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
* set to false.
+ * @since 1.3.0
*/
def toDense: DenseMatrix = {
new DenseMatrix(numRows, numCols, toArray)
}
+ /**
+ * @since 1.5.0
+ */
override def numNonzeros: Int = values.count(_ != 0)
+ /**
+ * @since 1.5.0
+ */
override def numActives: Int = values.length
}
/**
* Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]].
+ * @since 1.3.0
*/
object SparseMatrix {
@@ -629,6 +673,7 @@ object SparseMatrix {
* @param numCols number of columns of the matrix
* @param entries Array of (i, j, value) tuples
* @return The corresponding `SparseMatrix`
+ * @since 1.3.0
*/
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
@@ -677,6 +722,7 @@ object SparseMatrix {
* Generate an Identity Matrix in `SparseMatrix` format.
* @param n number of rows and columns of the matrix
* @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
+ * @since 1.3.0
*/
def speye(n: Int): SparseMatrix = {
new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
@@ -746,6 +792,7 @@ object SparseMatrix {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+ * @since 1.3.0
*/
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
val mat = genRandMatrix(numRows, numCols, density, rng)
@@ -759,6 +806,7 @@ object SparseMatrix {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+ * @since 1.3.0
*/
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
val mat = genRandMatrix(numRows, numCols, density, rng)
@@ -770,6 +818,7 @@ object SparseMatrix {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
* `values` on the diagonal
+ * @since 1.3.0
*/
def spdiag(vector: Vector): SparseMatrix = {
val n = vector.size
@@ -786,6 +835,7 @@ object SparseMatrix {
/**
* Factory methods for [[org.apache.spark.mllib.linalg.Matrix]].
+ * @since 1.0.0
*/
object Matrices {
@@ -795,6 +845,7 @@ object Matrices {
* @param numRows number of rows
* @param numCols number of columns
* @param values matrix entries in column major
+ * @since 1.0.0
*/
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = {
new DenseMatrix(numRows, numCols, values)
@@ -808,6 +859,7 @@ object Matrices {
* @param colPtrs the index corresponding to the start of a new column
* @param rowIndices the row index of the entry
* @param values non-zero matrix entries in column major
+ * @since 1.2.0
*/
def sparse(
numRows: Int,
@@ -841,6 +893,7 @@ object Matrices {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `Matrix` with size `numRows` x `numCols` and values of zeros
+ * @since 1.2.0
*/
def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)
@@ -849,6 +902,7 @@ object Matrices {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `Matrix` with size `numRows` x `numCols` and values of ones
+ * @since 1.2.0
*/
def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)
@@ -856,6 +910,7 @@ object Matrices {
* Generate a dense Identity Matrix in `Matrix` format.
* @param n number of rows and columns of the matrix
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
+ * @since 1.2.0
*/
def eye(n: Int): Matrix = DenseMatrix.eye(n)
@@ -863,6 +918,7 @@ object Matrices {
* Generate a sparse Identity Matrix in `Matrix` format.
* @param n number of rows and columns of the matrix
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
+ * @since 1.3.0
*/
def speye(n: Int): Matrix = SparseMatrix.speye(n)
@@ -872,6 +928,7 @@ object Matrices {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
+ * @since 1.2.0
*/
def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
DenseMatrix.rand(numRows, numCols, rng)
@@ -883,6 +940,7 @@ object Matrices {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
+ * @since 1.3.0
*/
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
SparseMatrix.sprand(numRows, numCols, density, rng)
@@ -893,6 +951,7 @@ object Matrices {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
+ * @since 1.2.0
*/
def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
DenseMatrix.randn(numRows, numCols, rng)
@@ -904,6 +963,7 @@ object Matrices {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
+ * @since 1.3.0
*/
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
SparseMatrix.sprandn(numRows, numCols, density, rng)
@@ -913,6 +973,7 @@ object Matrices {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `Matrix` with size `values.length` x `values.length` and `values`
* on the diagonal
+ * @since 1.2.0
*/
def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)
@@ -922,6 +983,7 @@ object Matrices {
* a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were horizontally concatenated
+ * @since 1.3.0
*/
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
@@ -980,6 +1042,7 @@ object Matrices {
* a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were vertically concatenated
+ * @since 1.3.0
*/
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
index cff5dbeee3..8f504f6984 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
@@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental
/**
* :: Experimental ::
* Represents singular value decomposition (SVD) factors.
+ * @since 1.0.0
*/
@Experimental
case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index df15d985c8..52ef7be3b3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -240,11 +240,13 @@ class VectorUDT extends UserDefinedType[Vector] {
* Factory methods for [[org.apache.spark.mllib.linalg.Vector]].
* We don't use the name `Vector` because Scala imports
* [[scala.collection.immutable.Vector]] by default.
+ * @since 1.0.0
*/
object Vectors {
/**
* Creates a dense vector from its values.
+ * @since 1.0.0
*/
@varargs
def dense(firstValue: Double, otherValues: Double*): Vector =
@@ -253,6 +255,7 @@ object Vectors {
// A dummy implicit is used to avoid signature collision with the one generated by @varargs.
/**
* Creates a dense vector from a double array.
+ * @since 1.0.0
*/
def dense(values: Array[Double]): Vector = new DenseVector(values)
@@ -262,6 +265,7 @@ object Vectors {
* @param size vector size.
* @param indices index array, must be strictly increasing.
* @param values value array, must have the same length as indices.
+ * @since 1.0.0
*/
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
new SparseVector(size, indices, values)
@@ -271,6 +275,7 @@ object Vectors {
*
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
+ * @since 1.0.0
*/
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
@@ -292,6 +297,7 @@ object Vectors {
*
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
+ * @since 1.0.0
*/
def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
sparse(size, elements.asScala.map { case (i, x) =>
@@ -304,6 +310,7 @@ object Vectors {
*
* @param size vector size
* @return a zero vector
+ * @since 1.1.0
*/
def zeros(size: Int): Vector = {
new DenseVector(new Array[Double](size))
@@ -311,6 +318,7 @@ object Vectors {
/**
* Parses a string resulted from [[Vector.toString]] into a [[Vector]].
+ * @since 1.1.0
*/
def parse(s: String): Vector = {
parseNumeric(NumericParser.parse(s))
@@ -354,6 +362,7 @@ object Vectors {
* @param vector input vector.
* @param p norm.
* @return norm in L^p^ space.
+ * @since 1.3.0
*/
def norm(vector: Vector, p: Double): Double = {
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
@@ -406,6 +415,7 @@ object Vectors {
* @param v1 first Vector.
* @param v2 second Vector.
* @return squared distance between two Vectors.
+ * @since 1.3.0
*/
def sqdist(v1: Vector, v2: Vector): Double = {
require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
@@ -519,20 +529,33 @@ object Vectors {
/**
* A dense vector represented by a value array.
+ * @since 1.0.0
*/
@SQLUserDefinedType(udt = classOf[VectorUDT])
class DenseVector(val values: Array[Double]) extends Vector {
+ /**
+ * @since 1.0.0
+ */
override def size: Int = values.length
override def toString: String = values.mkString("[", ",", "]")
+ /**
+ * @since 1.0.0
+ */
override def toArray: Array[Double] = values
private[spark] override def toBreeze: BV[Double] = new BDV[Double](values)
+ /**
+ * @since 1.0.0
+ */
override def apply(i: Int): Double = values(i)
+ /**
+ * @since 1.1.0
+ */
override def copy: DenseVector = {
new DenseVector(values.clone())
}
@@ -564,8 +587,14 @@ class DenseVector(val values: Array[Double]) extends Vector {
result
}
+ /**
+ * @since 1.4.0
+ */
override def numActives: Int = size
+ /**
+ * @since 1.4.0
+ */
override def numNonzeros: Int = {
// same as values.count(_ != 0.0) but faster
var nnz = 0
@@ -577,6 +606,9 @@ class DenseVector(val values: Array[Double]) extends Vector {
nnz
}
+ /**
+ * @since 1.4.0
+ */
override def toSparse: SparseVector = {
val nnz = numNonzeros
val ii = new Array[Int](nnz)
@@ -592,6 +624,9 @@ class DenseVector(val values: Array[Double]) extends Vector {
new SparseVector(size, ii, vv)
}
+ /**
+ * @since 1.5.0
+ */
override def argmax: Int = {
if (size == 0) {
-1
@@ -611,6 +646,9 @@ class DenseVector(val values: Array[Double]) extends Vector {
}
}
+/**
+ * @since 1.3.0
+ */
object DenseVector {
/** Extracts the value array from a dense vector. */
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
@@ -622,6 +660,7 @@ object DenseVector {
* @param size size of the vector.
* @param indices index array, assume to be strictly increasing.
* @param values value array, must have the same length as the index array.
+ * @since 1.0.0
*/
@SQLUserDefinedType(udt = classOf[VectorUDT])
class SparseVector(
@@ -638,6 +677,9 @@ class SparseVector(
override def toString: String =
s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})"
+ /**
+ * @since 1.0.0
+ */
override def toArray: Array[Double] = {
val data = new Array[Double](size)
var i = 0
@@ -649,6 +691,9 @@ class SparseVector(
data
}
+ /**
+ * @since 1.1.0
+ */
override def copy: SparseVector = {
new SparseVector(size, indices.clone(), values.clone())
}
@@ -689,8 +734,14 @@ class SparseVector(
result
}
+ /**
+ * @since 1.4.0
+ */
override def numActives: Int = values.length
+ /**
+ * @since 1.4.0
+ */
override def numNonzeros: Int = {
var nnz = 0
values.foreach { v =>
@@ -701,6 +752,9 @@ class SparseVector(
nnz
}
+ /**
+ * @since 1.4.0
+ */
override def toSparse: SparseVector = {
val nnz = numNonzeros
if (nnz == numActives) {
@@ -720,6 +774,9 @@ class SparseVector(
}
}
+ /**
+ * @since 1.5.0
+ */
override def argmax: Int = {
if (size == 0) {
-1
@@ -790,6 +847,9 @@ class SparseVector(
}
}
+/**
+ * @since 1.3.0
+ */
object SparseVector {
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 3323ae7b1f..cfb6680a18 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -128,6 +128,8 @@ private[mllib] object GridPartitioner {
* the number of rows will be calculated when `numRows` is invoked.
* @param nCols Number of columns of this matrix. If the supplied value is less than or equal to
* zero, the number of columns will be calculated when `numCols` is invoked.
+ * @since 1.3.0
+ *
*/
@Experimental
class BlockMatrix(
@@ -149,6 +151,9 @@ class BlockMatrix(
* rows are not required to have the given number of rows
* @param colsPerBlock Number of columns that make up each block. The blocks forming the final
* columns are not required to have the given number of columns
+ *
+ * @since 1.3.0
+ *
*/
def this(
blocks: RDD[((Int, Int), Matrix)],
@@ -157,11 +162,20 @@ class BlockMatrix(
this(blocks, rowsPerBlock, colsPerBlock, 0L, 0L)
}
+ /**
+ * @since 1.3.0
+ * */
+
override def numRows(): Long = {
if (nRows <= 0L) estimateDim()
nRows
}
+ /**
+ *
+ * @since 1.3.0
+ */
+
override def numCols(): Long = {
if (nCols <= 0L) estimateDim()
nCols
@@ -192,6 +206,7 @@ class BlockMatrix(
/**
* Validates the block matrix info against the matrix data (`blocks`) and throws an exception if
* any error is found.
+ * @since 1.3.0
*/
def validate(): Unit = {
logDebug("Validating BlockMatrix...")
@@ -228,19 +243,25 @@ class BlockMatrix(
logDebug("BlockMatrix is valid!")
}
- /** Caches the underlying RDD. */
+ /** Caches the underlying RDD.
+ * @since 1.3.0
+ * */
def cache(): this.type = {
blocks.cache()
this
}
- /** Persists the underlying RDD with the specified storage level. */
+ /** Persists the underlying RDD with the specified storage level.
+ * @since 1.3.0
+ * */
def persist(storageLevel: StorageLevel): this.type = {
blocks.persist(storageLevel)
this
}
- /** Converts to CoordinateMatrix. */
+ /** Converts to CoordinateMatrix.
+ * @since 1.3.0
+ * */
def toCoordinateMatrix(): CoordinateMatrix = {
val entryRDD = blocks.flatMap { case ((blockRowIndex, blockColIndex), mat) =>
val rowStart = blockRowIndex.toLong * rowsPerBlock
@@ -254,7 +275,9 @@ class BlockMatrix(
new CoordinateMatrix(entryRDD, numRows(), numCols())
}
- /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
+ /** Converts to IndexedRowMatrix. The number of columns must be within the integer range.
+ * @since 1.3.0
+ * */
def toIndexedRowMatrix(): IndexedRowMatrix = {
require(numCols() < Int.MaxValue, "The number of columns must be within the integer range. " +
s"numCols: ${numCols()}")
@@ -262,7 +285,9 @@ class BlockMatrix(
toCoordinateMatrix().toIndexedRowMatrix()
}
- /** Collect the distributed matrix on the driver as a `DenseMatrix`. */
+ /** Collect the distributed matrix on the driver as a `DenseMatrix`.
+ * @since 1.3.0
+ * */
def toLocalMatrix(): Matrix = {
require(numRows() < Int.MaxValue, "The number of rows of this matrix should be less than " +
s"Int.MaxValue. Currently numRows: ${numRows()}")
@@ -288,7 +313,10 @@ class BlockMatrix(
}
/** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the
- * same underlying data. Is a lazy operation. */
+ * same underlying data. Is a lazy operation.
+ * @since 1.3.0
+ *
+ * */
def transpose: BlockMatrix = {
val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) =>
((blockColIndex, blockRowIndex), mat.transpose)
@@ -307,6 +335,7 @@ class BlockMatrix(
* instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even
* if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will
* also be a [[DenseMatrix]].
+ * @since 1.3.0
*/
def add(other: BlockMatrix): BlockMatrix = {
require(numRows() == other.numRows(), "Both matrices must have the same number of rows. " +
@@ -340,6 +369,8 @@ class BlockMatrix(
* [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
* [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
* some performance issues until support for multiplying two sparse matrices is added.
+ *
+ * @since 1.3.0
*/
def multiply(other: BlockMatrix): BlockMatrix = {
require(numCols() == other.numRows(), "The number of columns of A and the number of rows " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 078d1fac44..2b751e45dd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -29,6 +29,7 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
* @param i row index
* @param j column index
* @param value value of the entry
+ * @since 1.0.0
*/
@Experimental
case class MatrixEntry(i: Long, j: Long, value: Double)
@@ -42,6 +43,7 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
* be determined by the max row index plus one.
* @param nCols number of columns. A non-positive value means unknown, and then the number of
* columns will be determined by the max column index plus one.
+ * @since 1.0.0
*/
@Experimental
class CoordinateMatrix(
@@ -49,10 +51,14 @@ class CoordinateMatrix(
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix {
- /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+ /** Alternative constructor leaving matrix dimensions to be determined automatically.
+ * @since 1.0.0
+ * */
def this(entries: RDD[MatrixEntry]) = this(entries, 0L, 0L)
- /** Gets or computes the number of columns. */
+ /** Gets or computes the number of columns.
+ * @since 1.0.0
+ * */
override def numCols(): Long = {
if (nCols <= 0L) {
computeSize()
@@ -60,7 +66,9 @@ class CoordinateMatrix(
nCols
}
- /** Gets or computes the number of rows. */
+ /** Gets or computes the number of rows.
+ * @since 1.0.0
+ * */
override def numRows(): Long = {
if (nRows <= 0L) {
computeSize()
@@ -68,12 +76,16 @@ class CoordinateMatrix(
nRows
}
- /** Transposes this CoordinateMatrix. */
+ /** Transposes this CoordinateMatrix.
+ * @since 1.3.0
+ * */
def transpose(): CoordinateMatrix = {
new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
}
- /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
+ /** Converts to IndexedRowMatrix. The number of columns must be within the integer range.
+ * @since 1.0.0
+ * */
def toIndexedRowMatrix(): IndexedRowMatrix = {
val nl = numCols()
if (nl > Int.MaxValue) {
@@ -92,12 +104,15 @@ class CoordinateMatrix(
/**
* Converts to RowMatrix, dropping row indices after grouping by row index.
* The number of columns must be within the integer range.
+ * @since 1.0.0
*/
def toRowMatrix(): RowMatrix = {
toIndexedRowMatrix().toRowMatrix()
}
- /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+ /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024.
+ * @since 1.3.0
+ * */
def toBlockMatrix(): BlockMatrix = {
toBlockMatrix(1024, 1024)
}
@@ -109,6 +124,7 @@ class CoordinateMatrix(
* @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
* a smaller value. Must be an integer value greater than 0.
* @return a [[BlockMatrix]]
+ * @since 1.3.0
*/
def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
require(rowsPerBlock > 0,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
index a0e26ce3bc..98e90af84a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
@@ -21,6 +21,7 @@ import breeze.linalg.{DenseMatrix => BDM}
/**
* Represents a distributively stored matrix backed by one or more RDDs.
+ * @since 1.0.0
*/
trait DistributedMatrix extends Serializable {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 1c33b43ea7..a09f88ce28 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -27,6 +27,7 @@ import org.apache.spark.mllib.linalg.SingularValueDecomposition
/**
* :: Experimental ::
* Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]].
+ * @since 1.0.0
*/
@Experimental
case class IndexedRow(index: Long, vector: Vector)
@@ -41,6 +42,7 @@ case class IndexedRow(index: Long, vector: Vector)
* be determined by the max row index plus one.
* @param nCols number of columns. A non-positive value means unknown, and then the number of
* columns will be determined by the size of the first row.
+ * @since 1.0.0
*/
@Experimental
class IndexedRowMatrix(
@@ -48,9 +50,15 @@ class IndexedRowMatrix(
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix {
- /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+ /** Alternative constructor leaving matrix dimensions to be determined automatically.
+ * @since 1.0.0
+ * */
def this(rows: RDD[IndexedRow]) = this(rows, 0L, 0)
+ /**
+ *
+ * @since 1.0.0
+ */
override def numCols(): Long = {
if (nCols <= 0) {
// Calling `first` will throw an exception if `rows` is empty.
@@ -59,6 +67,10 @@ class IndexedRowMatrix(
nCols
}
+ /**
+ *
+ * @since 1.0.0
+ */
override def numRows(): Long = {
if (nRows <= 0L) {
// Reduce will throw an exception if `rows` is empty.
@@ -70,12 +82,15 @@ class IndexedRowMatrix(
/**
* Drops row indices and converts this matrix to a
* [[org.apache.spark.mllib.linalg.distributed.RowMatrix]].
+ * @since 1.0.0
*/
def toRowMatrix(): RowMatrix = {
new RowMatrix(rows.map(_.vector), 0L, nCols)
}
- /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+ /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024.
+ * @since 1.3.0
+ * */
def toBlockMatrix(): BlockMatrix = {
toBlockMatrix(1024, 1024)
}
@@ -87,6 +102,7 @@ class IndexedRowMatrix(
* @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
* a smaller value. Must be an integer value greater than 0.
* @return a [[BlockMatrix]]
+ * @since 1.3.0
*/
def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
// TODO: This implementation may be optimized
@@ -96,6 +112,7 @@ class IndexedRowMatrix(
/**
* Converts this matrix to a
* [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
+ * @since 1.3.0
*/
def toCoordinateMatrix(): CoordinateMatrix = {
val entries = rows.flatMap { row =>
@@ -132,6 +149,7 @@ class IndexedRowMatrix(
* @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
* are treated as zero, where sigma(0) is the largest singular value.
* @return SingularValueDecomposition(U, s, V)
+ * @since 1.0.0
*/
def computeSVD(
k: Int,
@@ -158,6 +176,7 @@ class IndexedRowMatrix(
*
* @param B a local matrix whose number of rows must match the number of columns of this matrix
* @return an IndexedRowMatrix representing the product, which preserves partitioning
+ * @since 1.0.0
*/
def multiply(B: Matrix): IndexedRowMatrix = {
val mat = toRowMatrix().multiply(B)
@@ -169,6 +188,7 @@ class IndexedRowMatrix(
/**
* Computes the Gramian matrix `A^T A`.
+ * @since 1.0.0
*/
def computeGramianMatrix(): Matrix = {
toRowMatrix().computeGramianMatrix()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index bfc90c9ef8..b2e94f2dd6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -44,6 +44,7 @@ import org.apache.spark.storage.StorageLevel
* be determined by the number of records in the RDD `rows`.
* @param nCols number of columns. A non-positive value means unknown, and then the number of
* columns will be determined by the size of the first row.
+ * @since 1.0.0
*/
@Experimental
class RowMatrix(
@@ -51,10 +52,14 @@ class RowMatrix(
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix with Logging {
- /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+ /** Alternative constructor leaving matrix dimensions to be determined automatically.
+ * @since 1.0.0
+ * */
def this(rows: RDD[Vector]) = this(rows, 0L, 0)
- /** Gets or computes the number of columns. */
+ /** Gets or computes the number of columns.
+ * @since 1.0.0
+ * */
override def numCols(): Long = {
if (nCols <= 0) {
try {
@@ -69,7 +74,9 @@ class RowMatrix(
nCols
}
- /** Gets or computes the number of rows. */
+ /** Gets or computes the number of rows.
+ * @since 1.0.0
+ * */
override def numRows(): Long = {
if (nRows <= 0L) {
nRows = rows.count()
@@ -107,6 +114,7 @@ class RowMatrix(
/**
* Computes the Gramian matrix `A^T A`.
+ * @since 1.0.0
*/
def computeGramianMatrix(): Matrix = {
val n = numCols().toInt
@@ -177,6 +185,7 @@ class RowMatrix(
* @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
* are treated as zero, where sigma(0) is the largest singular value.
* @return SingularValueDecomposition(U, s, V). U = null if computeU = false.
+ * @since 1.0.0
*/
def computeSVD(
k: Int,
@@ -317,6 +326,7 @@ class RowMatrix(
/**
* Computes the covariance matrix, treating each row as an observation.
* @return a local dense matrix of size n x n
+ * @since 1.0.0
*/
def computeCovariance(): Matrix = {
val n = numCols().toInt
@@ -370,6 +380,7 @@ class RowMatrix(
*
* @param k number of top principal components.
* @return a matrix of size n-by-k, whose columns are principal components
+ * @since 1.0.0
*/
def computePrincipalComponents(k: Int): Matrix = {
val n = numCols().toInt
@@ -388,6 +399,7 @@ class RowMatrix(
/**
* Computes column-wise summary statistics.
+ * @since 1.0.0
*/
def computeColumnSummaryStatistics(): MultivariateStatisticalSummary = {
val summary = rows.treeAggregate(new MultivariateOnlineSummarizer)(
@@ -403,6 +415,7 @@ class RowMatrix(
* @param B a local matrix whose number of rows must match the number of columns of this matrix
* @return a [[org.apache.spark.mllib.linalg.distributed.RowMatrix]] representing the product,
* which preserves partitioning
+ * @since 1.0.0
*/
def multiply(B: Matrix): RowMatrix = {
val n = numCols().toInt
@@ -435,6 +448,7 @@ class RowMatrix(
*
* @return An n x n sparse upper-triangular matrix of cosine similarities between
* columns of this matrix.
+ * @since 1.2.0
*/
def columnSimilarities(): CoordinateMatrix = {
columnSimilarities(0.0)
@@ -478,6 +492,7 @@ class RowMatrix(
* with the cost vs estimate quality trade-off described above.
* @return An n x n sparse upper-triangular matrix of cosine similarities
* between columns of this matrix.
+ * @since 1.2.0
*/
def columnSimilarities(threshold: Double): CoordinateMatrix = {
require(threshold >= 0, s"Threshold cannot be negative: $threshold")
@@ -656,6 +671,9 @@ class RowMatrix(
}
}
+/**
+ * @since 1.0.0
+ */
@Experimental
object RowMatrix {