aboutsummaryrefslogtreecommitdiff
path: root/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
diff options
context:
space:
mode:
Diffstat (limited to 'mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala')
-rw-r--r--mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala39
1 files changed, 35 insertions, 4 deletions
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 4275a22ae0..5b3b3b6887 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -28,21 +28,26 @@ import org.json4s.DefaultFormats
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
+import org.apache.spark.annotation.Since
+
/**
* Represents a numeric vector, whose index type is Int and value type is Double.
*
* Note: Users should not implement this interface.
*/
+@Since("2.0.0")
sealed trait Vector extends Serializable {
/**
* Size of the vector.
*/
+ @Since("2.0.0")
def size: Int
/**
* Converts the instance to a double array.
*/
+ @Since("2.0.0")
def toArray: Array[Double]
override def equals(other: Any): Boolean = {
@@ -96,11 +101,13 @@ sealed trait Vector extends Serializable {
* Gets the value of the ith element.
* @param i index
*/
+ @Since("2.0.0")
def apply(i: Int): Double = toBreeze(i)
/**
* Makes a deep copy of this vector.
*/
+ @Since("2.0.0")
def copy: Vector = {
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
}
@@ -112,32 +119,38 @@ sealed trait Vector extends Serializable {
* the vector with type `Int`, and the second parameter is the corresponding value
* with type `Double`.
*/
+ @Since("2.0.0")
def foreachActive(f: (Int, Double) => Unit): Unit
/**
* Number of active entries. An "active entry" is an element which is explicitly stored,
* regardless of its value. Note that inactive entries have value 0.
*/
+ @Since("2.0.0")
def numActives: Int
/**
* Number of nonzero elements. This scans all active values and count nonzeros.
*/
+ @Since("2.0.0")
def numNonzeros: Int
/**
* Converts this vector to a sparse vector with all explicit zeros removed.
*/
+ @Since("2.0.0")
def toSparse: SparseVector
/**
* Converts this vector to a dense vector.
*/
+ @Since("2.0.0")
def toDense: DenseVector = new DenseVector(this.toArray)
/**
* Returns a vector in either dense or sparse format, whichever uses less storage.
*/
+ @Since("2.0.0")
def compressed: Vector = {
val nnz = numNonzeros
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
@@ -152,11 +165,13 @@ sealed trait Vector extends Serializable {
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
* Returns -1 if vector has length 0.
*/
+ @Since("2.0.0")
def argmax: Int
/**
* Converts the vector to a JSON string.
*/
+ @Since("2.0.0")
def toJson: String
}
@@ -165,12 +180,14 @@ sealed trait Vector extends Serializable {
* We don't use the name `Vector` because Scala imports
* [[scala.collection.immutable.Vector]] by default.
*/
+@Since("2.0.0")
object Vectors {
/**
* Creates a dense vector from its values.
*/
@varargs
+ @Since("2.0.0")
def dense(firstValue: Double, otherValues: Double*): Vector =
new DenseVector((firstValue +: otherValues).toArray)
@@ -178,6 +195,7 @@ object Vectors {
/**
* Creates a dense vector from a double array.
*/
+ @Since("2.0.0")
def dense(values: Array[Double]): Vector = new DenseVector(values)
/**
@@ -187,6 +205,7 @@ object Vectors {
* @param indices index array, must be strictly increasing.
* @param values value array, must have the same length as indices.
*/
+ @Since("2.0.0")
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
new SparseVector(size, indices, values)
@@ -196,6 +215,7 @@ object Vectors {
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
*/
+ @Since("2.0.0")
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
@@ -217,6 +237,7 @@ object Vectors {
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
*/
+ @Since("2.0.0")
def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
sparse(size, elements.asScala.map { case (i, x) =>
(i.intValue(), x.doubleValue())
@@ -229,6 +250,7 @@ object Vectors {
* @param size vector size
* @return a zero vector
*/
+ @Since("2.0.0")
def zeros(size: Int): Vector = {
new DenseVector(new Array[Double](size))
}
@@ -236,6 +258,7 @@ object Vectors {
/**
* Parses the JSON representation of a vector into a [[Vector]].
*/
+ @Since("2.0.0")
def fromJson(json: String): Vector = {
implicit val formats = DefaultFormats
val jValue = parseJson(json)
@@ -281,6 +304,7 @@ object Vectors {
* @param p norm.
* @return norm in L^p^ space.
*/
+ @Since("2.0.0")
def norm(vector: Vector, p: Double): Double = {
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
s"You specified p=$p.")
@@ -333,6 +357,7 @@ object Vectors {
* @param v2 second Vector.
* @return squared distance between two Vectors.
*/
+ @Since("2.0.0")
def sqdist(v1: Vector, v2: Vector): Double = {
require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
s"=${v2.size}.")
@@ -449,7 +474,8 @@ object Vectors {
/**
* A dense vector represented by a value array.
*/
-class DenseVector (val values: Array[Double]) extends Vector {
+@Since("2.0.0")
+class DenseVector @Since("2.0.0") (@Since("2.0.0") val values: Array[Double]) extends Vector {
override def size: Int = values.length
@@ -548,9 +574,11 @@ class DenseVector (val values: Array[Double]) extends Vector {
}
}
+@Since("2.0.0")
object DenseVector {
/** Extracts the value array from a dense vector. */
+ @Since("2.0.0")
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
}
@@ -561,10 +589,11 @@ object DenseVector {
* @param indices index array, assume to be strictly increasing.
* @param values value array, must have the same length as the index array.
*/
-class SparseVector (
+@Since("2.0.0")
+class SparseVector @Since("2.0.0") (
override val size: Int,
- val indices: Array[Int],
- val values: Array[Double]) extends Vector {
+ @Since("2.0.0") val indices: Array[Int],
+ @Since("2.0.0") val values: Array[Double]) extends Vector {
require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
@@ -734,7 +763,9 @@ class SparseVector (
}
}
+@Since("2.0.0")
object SparseVector {
+ @Since("2.0.0")
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
}