aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorTor Myklebust <tmyklebu@gmail.com>2014-04-19 15:10:18 -0700
committerMatei Zaharia <matei@databricks.com>2014-04-19 15:10:18 -0700
commit25fc31884b0382b2d43c55e1f55e305a73dfae91 (patch)
tree18874bdf4e26fb995325e522938bfc890361c494 /mllib
parent10d04213ffda9c64b7e39c7debc433be8ea343c7 (diff)
downloadspark-25fc31884b0382b2d43c55e1f55e305a73dfae91.tar.gz
spark-25fc31884b0382b2d43c55e1f55e305a73dfae91.tar.bz2
spark-25fc31884b0382b2d43c55e1f55e305a73dfae91.zip
[SPARK-1535] ALS: Avoid the garbage-creating ctor of DoubleMatrix
`new DoubleMatrix(double[])` creates a garbage `double[]` of the same length as its argument and immediately throws it away. This pull request avoids that constructor in the ALS code. Author: Tor Myklebust <tmyklebu@gmail.com> Closes #442 from tmyklebu/foo2 and squashes the following commits: 2784fc5 [Tor Myklebust] Mention that this is probably fixed as of jblas 1.2.4; repunctuate. a09904f [Tor Myklebust] Helper function for wrapping Array[Double]'s with DoubleMatrix's.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala13
1 files changed, 11 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 102742c7c5..1f5c746a34 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -269,7 +269,7 @@ class ALS private (
private def computeYtY(factors: RDD[(Int, Array[Array[Double]])]) = {
val n = rank * (rank + 1) / 2
val LYtY = factors.values.aggregate(new DoubleMatrix(n))( seqOp = (L, Y) => {
- Y.foreach(y => dspr(1.0, new DoubleMatrix(y), L))
+ Y.foreach(y => dspr(1.0, wrapDoubleArray(y), L))
L
}, combOp = (L1, L2) => {
L1.addi(L2)
@@ -305,6 +305,15 @@ class ALS private (
}
/**
+ * Wrap a double array in a DoubleMatrix without creating garbage.
+ * This is a temporary fix for jblas 1.2.3; it should be safe to move back to the
+ * DoubleMatrix(double[]) constructor come jblas 1.2.4.
+ */
+ private def wrapDoubleArray(v: Array[Double]): DoubleMatrix = {
+ new DoubleMatrix(v.length, 1, v: _*)
+ }
+
+ /**
* Flatten out blocked user or product factors into an RDD of (id, factor vector) pairs
*/
private def unblockFactors(blockedFactors: RDD[(Int, Array[Array[Double]])],
@@ -457,7 +466,7 @@ class ALS private (
// block
for (productBlock <- 0 until numBlocks) {
for (p <- 0 until blockFactors(productBlock).length) {
- val x = new DoubleMatrix(blockFactors(productBlock)(p))
+ val x = wrapDoubleArray(blockFactors(productBlock)(p))
tempXtX.fill(0.0)
dspr(1.0, x, tempXtX)
val (us, rs) = inLinkBlock.ratingsForBlock(productBlock)(p)