aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-08-25 20:07:56 -0700
committerDB Tsai <dbt@netflix.com>2015-08-25 20:07:56 -0700
commitab431f8a970b85fba34ccb506c0f8815e55c63bf (patch)
tree3ae4aa17be452012aa7ac4bf680ecc4e24e1cf77 /mllib
parent8668ead2e7097b9591069599fbfccf67c53db659 (diff)
downloadspark-ab431f8a970b85fba34ccb506c0f8815e55c63bf.tar.gz
spark-ab431f8a970b85fba34ccb506c0f8815e55c63bf.tar.bz2
spark-ab431f8a970b85fba34ccb506c0f8815e55c63bf.zip
[SPARK-10238] [MLLIB] update since versions in mllib.linalg
Same as #8421 but for `mllib.linalg`. cc dbtsai Author: Xiangrui Meng <meng@databricks.com> Closes #8440 from mengxr/SPARK-10238 and squashes the following commits: b38437e [Xiangrui Meng] update since versions in mllib.linalg
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala44
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala25
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala5
8 files changed, 64 insertions, 31 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 28b5b4637b..c02ba426fc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -32,18 +32,23 @@ import org.apache.spark.sql.types._
* Trait for a local matrix.
*/
@SQLUserDefinedType(udt = classOf[MatrixUDT])
+@Since("1.0.0")
sealed trait Matrix extends Serializable {
/** Number of rows. */
+ @Since("1.0.0")
def numRows: Int
/** Number of columns. */
+ @Since("1.0.0")
def numCols: Int
/** Flag that keeps track whether the matrix is transposed or not. False by default. */
+ @Since("1.3.0")
val isTransposed: Boolean = false
/** Converts to a dense array in column major. */
+ @Since("1.0.0")
def toArray: Array[Double] = {
val newArray = new Array[Double](numRows * numCols)
foreachActive { (i, j, v) =>
@@ -56,6 +61,7 @@ sealed trait Matrix extends Serializable {
private[mllib] def toBreeze: BM[Double]
/** Gets the (i, j)-th element. */
+ @Since("1.3.0")
def apply(i: Int, j: Int): Double
/** Return the index for the (i, j)-th element in the backing array. */
@@ -65,12 +71,15 @@ sealed trait Matrix extends Serializable {
private[mllib] def update(i: Int, j: Int, v: Double): Unit
/** Get a deep copy of the matrix. */
+ @Since("1.2.0")
def copy: Matrix
/** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
+ @Since("1.3.0")
def transpose: Matrix
/** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
+ @Since("1.2.0")
def multiply(y: DenseMatrix): DenseMatrix = {
val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
BLAS.gemm(1.0, this, y, 0.0, C)
@@ -78,11 +87,13 @@ sealed trait Matrix extends Serializable {
}
/** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
+ @Since("1.2.0")
def multiply(y: DenseVector): DenseVector = {
multiply(y.asInstanceOf[Vector])
}
/** Convenience method for `Matrix`-`Vector` multiplication. */
+ @Since("1.4.0")
def multiply(y: Vector): DenseVector = {
val output = new DenseVector(new Array[Double](numRows))
BLAS.gemv(1.0, this, y, 0.0, output)
@@ -93,6 +104,7 @@ sealed trait Matrix extends Serializable {
override def toString: String = toBreeze.toString()
/** A human readable representation of the matrix with maximum lines and width */
+ @Since("1.4.0")
def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth)
/** Map the values of this matrix using a function. Generates a new matrix. Performs the
@@ -118,11 +130,13 @@ sealed trait Matrix extends Serializable {
/**
* Find the number of non-zero active values.
*/
+ @Since("1.5.0")
def numNonzeros: Int
/**
* Find the number of values stored explicitly. These values can be zero as well.
*/
+ @Since("1.5.0")
def numActives: Int
}
@@ -230,11 +244,11 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
-class DenseMatrix(
- val numRows: Int,
- val numCols: Int,
- val values: Array[Double],
- override val isTransposed: Boolean) extends Matrix {
+class DenseMatrix @Since("1.3.0") (
+ @Since("1.0.0") val numRows: Int,
+ @Since("1.0.0") val numCols: Int,
+ @Since("1.0.0") val values: Array[Double],
+ @Since("1.3.0") override val isTransposed: Boolean) extends Matrix {
require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}")
@@ -254,7 +268,7 @@ class DenseMatrix(
* @param numCols number of columns
* @param values matrix entries in column major
*/
- @Since("1.3.0")
+ @Since("1.0.0")
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)
@@ -491,13 +505,13 @@ object DenseMatrix {
*/
@Since("1.2.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
-class SparseMatrix(
- val numRows: Int,
- val numCols: Int,
- val colPtrs: Array[Int],
- val rowIndices: Array[Int],
- val values: Array[Double],
- override val isTransposed: Boolean) extends Matrix {
+class SparseMatrix @Since("1.3.0") (
+ @Since("1.2.0") val numRows: Int,
+ @Since("1.2.0") val numCols: Int,
+ @Since("1.2.0") val colPtrs: Array[Int],
+ @Since("1.2.0") val rowIndices: Array[Int],
+ @Since("1.2.0") val values: Array[Double],
+ @Since("1.3.0") override val isTransposed: Boolean) extends Matrix {
require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}")
@@ -527,7 +541,7 @@ class SparseMatrix(
* order for each column
* @param values non-zero matrix entries in column major
*/
- @Since("1.3.0")
+ @Since("1.2.0")
def this(
numRows: Int,
numCols: Int,
@@ -549,8 +563,6 @@ class SparseMatrix(
}
}
- /**
- */
@Since("1.3.0")
override def apply(i: Int, j: Int): Double = {
val ind = index(i, j)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
index a37aca99d5..4dcf8f28f2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
@@ -31,6 +31,7 @@ case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VTyp
* :: Experimental ::
* Represents QR factors.
*/
+@Since("1.5.0")
@Experimental
case class QRDecomposition[QType, RType](Q: QType, R: RType)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 3d577edbe2..06ebb15869 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -38,16 +38,19 @@ import org.apache.spark.sql.types._
* Note: Users should not implement this interface.
*/
@SQLUserDefinedType(udt = classOf[VectorUDT])
+@Since("1.0.0")
sealed trait Vector extends Serializable {
/**
* Size of the vector.
*/
+ @Since("1.0.0")
def size: Int
/**
* Converts the instance to a double array.
*/
+ @Since("1.0.0")
def toArray: Array[Double]
override def equals(other: Any): Boolean = {
@@ -99,11 +102,13 @@ sealed trait Vector extends Serializable {
* Gets the value of the ith element.
* @param i index
*/
+ @Since("1.1.0")
def apply(i: Int): Double = toBreeze(i)
/**
* Makes a deep copy of this vector.
*/
+ @Since("1.1.0")
def copy: Vector = {
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
}
@@ -121,26 +126,31 @@ sealed trait Vector extends Serializable {
* Number of active entries. An "active entry" is an element which is explicitly stored,
* regardless of its value. Note that inactive entries have value 0.
*/
+ @Since("1.4.0")
def numActives: Int
/**
* Number of nonzero elements. This scans all active values and count nonzeros.
*/
+ @Since("1.4.0")
def numNonzeros: Int
/**
* Converts this vector to a sparse vector with all explicit zeros removed.
*/
+ @Since("1.4.0")
def toSparse: SparseVector
/**
* Converts this vector to a dense vector.
*/
+ @Since("1.4.0")
def toDense: DenseVector = new DenseVector(this.toArray)
/**
* Returns a vector in either dense or sparse format, whichever uses less storage.
*/
+ @Since("1.4.0")
def compressed: Vector = {
val nnz = numNonzeros
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
@@ -155,6 +165,7 @@ sealed trait Vector extends Serializable {
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
* Returns -1 if vector has length 0.
*/
+ @Since("1.5.0")
def argmax: Int
}
@@ -532,7 +543,8 @@ object Vectors {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
-class DenseVector(val values: Array[Double]) extends Vector {
+class DenseVector @Since("1.0.0") (
+ @Since("1.0.0") val values: Array[Double]) extends Vector {
@Since("1.0.0")
override def size: Int = values.length
@@ -632,7 +644,9 @@ class DenseVector(val values: Array[Double]) extends Vector {
@Since("1.3.0")
object DenseVector {
+
/** Extracts the value array from a dense vector. */
+ @Since("1.3.0")
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
}
@@ -645,10 +659,10 @@ object DenseVector {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
-class SparseVector(
- override val size: Int,
- val indices: Array[Int],
- val values: Array[Double]) extends Vector {
+class SparseVector @Since("1.0.0") (
+ @Since("1.0.0") override val size: Int,
+ @Since("1.0.0") val indices: Array[Int],
+ @Since("1.0.0") val values: Array[Double]) extends Vector {
require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
@@ -819,6 +833,7 @@ class SparseVector(
@Since("1.3.0")
object SparseVector {
+ @Since("1.3.0")
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 94376c24a7..a33b6137cf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -131,10 +131,10 @@ private[mllib] object GridPartitioner {
*/
@Since("1.3.0")
@Experimental
-class BlockMatrix(
- val blocks: RDD[((Int, Int), Matrix)],
- val rowsPerBlock: Int,
- val colsPerBlock: Int,
+class BlockMatrix @Since("1.3.0") (
+ @Since("1.3.0") val blocks: RDD[((Int, Int), Matrix)],
+ @Since("1.3.0") val rowsPerBlock: Int,
+ @Since("1.3.0") val colsPerBlock: Int,
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix with Logging {
@@ -171,7 +171,9 @@ class BlockMatrix(
nCols
}
+ @Since("1.3.0")
val numRowBlocks = math.ceil(numRows() * 1.0 / rowsPerBlock).toInt
+ @Since("1.3.0")
val numColBlocks = math.ceil(numCols() * 1.0 / colsPerBlock).toInt
private[mllib] def createPartitioner(): GridPartitioner =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 4bb27ec840..644f293d88 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -46,8 +46,8 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
*/
@Since("1.0.0")
@Experimental
-class CoordinateMatrix(
- val entries: RDD[MatrixEntry],
+class CoordinateMatrix @Since("1.0.0") (
+ @Since("1.0.0") val entries: RDD[MatrixEntry],
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
index e51327ebb7..db3433a5e2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
@@ -28,9 +28,11 @@ import org.apache.spark.annotation.Since
trait DistributedMatrix extends Serializable {
/** Gets or computes the number of rows. */
+ @Since("1.0.0")
def numRows(): Long
/** Gets or computes the number of columns. */
+ @Since("1.0.0")
def numCols(): Long
/** Collects data and assembles a local dense breeze matrix (for test only). */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 6d2c05a47d..b20ea0dc50 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -45,8 +45,8 @@ case class IndexedRow(index: Long, vector: Vector)
*/
@Since("1.0.0")
@Experimental
-class IndexedRowMatrix(
- val rows: RDD[IndexedRow],
+class IndexedRowMatrix @Since("1.0.0") (
+ @Since("1.0.0") val rows: RDD[IndexedRow],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 78036eba5c..9a423ddafd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -47,8 +47,8 @@ import org.apache.spark.storage.StorageLevel
*/
@Since("1.0.0")
@Experimental
-class RowMatrix(
- val rows: RDD[Vector],
+class RowMatrix @Since("1.0.0") (
+ @Since("1.0.0") val rows: RDD[Vector],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix with Logging {
@@ -519,6 +519,7 @@ class RowMatrix(
* @param computeQ whether to computeQ
* @return QRDecomposition(Q, R), Q = null if computeQ = false.
*/
+ @Since("1.5.0")
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
val col = numCols().toInt
// split rows horizontally into smaller matrices, and compute QR for each of them