aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-07-02 11:28:14 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-02 11:28:14 -0700
commit34d448dbe1d7bd5bf9a8d6ef473878e570ca6161 (patch)
tree6a0b8df867906b89b5358da9ea60ce62fb7541d5 /mllib
parent2e2f32603c110b9c6ddfbb836f63882eacf0a8cc (diff)
downloadspark-34d448dbe1d7bd5bf9a8d6ef473878e570ca6161.tar.gz
spark-34d448dbe1d7bd5bf9a8d6ef473878e570ca6161.tar.bz2
spark-34d448dbe1d7bd5bf9a8d6ef473878e570ca6161.zip
[SPARK-8479] [MLLIB] Add numNonzeros and numActives to linalg.Matrices
Matrices allow zeros to be stored in values. Sometimes a method is handy to check if the numNonZeros are same as number of Active values. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #6904 from MechCoder/nnz_matrix and squashes the following commits: 252c6b7 [MechCoder] Add to MiMa excludes e2390f5 [MechCoder] Use count instead of foreach 2f62b2f [MechCoder] Add to MiMa excludes d6e96ef [MechCoder] [SPARK-8479] Add numNonzeros and numActives to linalg.Matrices
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala19
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala10
2 files changed, 29 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 0a615494bb..75e7004464 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -114,6 +114,16 @@ sealed trait Matrix extends Serializable {
* corresponding value in the matrix with type `Double`.
*/
private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
+
+ /**
+ * Find the number of non-zero active values.
+ */
+ def numNonzeros: Int
+
+ /**
+ * Find the number of values stored explicitly. These values can be zero as well.
+ */
+ def numActives: Int
}
@DeveloperApi
@@ -324,6 +334,10 @@ class DenseMatrix(
}
}
+ override def numNonzeros: Int = values.count(_ != 0)
+
+ override def numActives: Int = values.length
+
/**
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
* set to false.
@@ -593,6 +607,11 @@ class SparseMatrix(
def toDense: DenseMatrix = {
new DenseMatrix(numRows, numCols, toArray)
}
+
+ override def numNonzeros: Int = values.count(_ != 0)
+
+ override def numActives: Int = values.length
+
}
/**
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 8dbb70f5d1..a270ba2562 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -455,4 +455,14 @@ class MatricesSuite extends SparkFunSuite {
lines = mat.toString(5, 100).lines.toArray
assert(lines.size == 5 && lines.forall(_.size <= 100))
}
+
+ test("numNonzeros and numActives") {
+ val dm1 = Matrices.dense(3, 2, Array(0, 0, -1, 1, 0, 1))
+ assert(dm1.numNonzeros === 3)
+ assert(dm1.numActives === 6)
+
+ val sm1 = Matrices.sparse(3, 2, Array(0, 2, 3), Array(0, 2, 1), Array(0.0, -1.2, 0.0))
+ assert(sm1.numNonzeros === 1)
+ assert(sm1.numActives === 3)
+ }
}