[SPARK-13715][MLLIB] Remove last usages of jblas in tests

## What changes were proposed in this pull request? Remove last usage of jblas, in tests ## How was this patch tested? Jenkins tests -- the same ones that are being modified. Author: Sean Owen <sowen@cloudera.com> Closes #11560 from srowen/SPARK-13715.
author: Sean Owen <sowen@cloudera.com> 2016-03-08 17:47:55 +0000
committer: Sean Owen <sowen@cloudera.com> 2016-03-08 17:47:55 +0000
commit: 54040f8d350d2aad3078dcffef808c62b7c0b73d (patch)
tree: 1eacb775f8426130a3152cf9afd30c47fd5f9143 /mllib/src/test
parent: ca1a7b9d6acf8e1f9b6ab6265f9001c2c7ff8489 (diff)
download: spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.tar.gz
spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.tar.bz2
spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.zip
6 files changed, 106 insertions, 112 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
index a6631ed7eb..d0bf7f556d 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -24,7 +24,6 @@ import java.util.List;
 import scala.Tuple2;
 import scala.Tuple3;
 
-import org.jblas.DoubleMatrix;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -48,14 +47,14 @@ public class JavaALSSuite implements Serializable {
     sc = null;
   }
 
-  void validatePrediction(
+  private void validatePrediction(
       MatrixFactorizationModel model,
       int users,
       int products,
-      DoubleMatrix trueRatings,
+      double[] trueRatings,
       double matchThreshold,
       boolean implicitPrefs,
-      DoubleMatrix truePrefs) {
+      double[] truePrefs) {
     List<Tuple2<Integer, Integer>> localUsersProducts = new ArrayList<>(users * products);
     for (int u=0; u < users; ++u) {
       for (int p=0; p < products; ++p) {
@@ -68,7 +67,7 @@ public class JavaALSSuite implements Serializable {
     if (!implicitPrefs) {
       for (Rating r: predictedRatings) {
         double prediction = r.rating();
-        double correct = trueRatings.get(r.user(), r.product());
+        double correct = trueRatings[r.product() * users + r.user()];
         Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
           prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
       }
@@ -79,9 +78,9 @@ public class JavaALSSuite implements Serializable {
       double denom = 0.0;
       for (Rating r: predictedRatings) {
         double prediction = r.rating();
-        double truePref = truePrefs.get(r.user(), r.product());
+        double truePref = truePrefs[r.product() * users + r.user()];
         double confidence = 1.0 +
-          /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
+          /* alpha = 1.0 * ... */ Math.abs(trueRatings[r.product() * users + r.user()]);
         double err = confidence * (truePref - prediction) * (truePref - prediction);
         sqErr += err;
         denom += confidence;
@@ -98,8 +97,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 50;
     int products = 100;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, false, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
@@ -112,8 +111,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, false, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
 
@@ -129,8 +128,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
@@ -143,8 +142,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
 
@@ -161,8 +160,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, true);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, true);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = new ALS().setRank(features)
@@ -179,9 +178,9 @@ public class JavaALSSuite implements Serializable {
     int iterations = 10;
     int users = 200;
     int products = 50;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
-    JavaRDD<Rating> data = sc.parallelize(testData._1());
+    List<Rating> testData = ALSSuite.generateRatingsAsJava(
+        users, products, features, 0.7, true, false)._1();
+    JavaRDD<Rating> data = sc.parallelize(testData);
     MatrixFactorizationModel model = new ALS().setRank(features)
       .setIterations(iterations)
       .setImplicitPrefs(true)
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
index 7266eec235..c56db703ea 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -19,14 +19,13 @@ package org.apache.spark.mllib.regression;
 
 import java.io.Serializable;
 import java.util.List;
+import java.util.Random;
 
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import org.jblas.DoubleMatrix;
-
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.mllib.util.LinearDataGenerator;
@@ -45,7 +44,8 @@ public class JavaRidgeRegressionSuite implements Serializable {
       sc = null;
   }
 
-  double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) {
+  private static double predictionError(List<LabeledPoint> validationData,
+                                        RidgeRegressionModel model) {
     double errorSum = 0;
     for (LabeledPoint point: validationData) {
       Double prediction = model.predict(point.features());
@@ -54,11 +54,14 @@ public class JavaRidgeRegressionSuite implements Serializable {
     return errorSum / validationData.size();
   }
 
-  List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
-    org.jblas.util.Random.seed(42);
+  private static List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
     // Pick weights as random values distributed uniformly in [-0.5, 0.5]
-    DoubleMatrix w = DoubleMatrix.rand(numFeatures, 1).subi(0.5);
-    return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, std);
+    Random random = new Random(42);
+    double[] w = new double[numFeatures];
+    for (int i = 0; i < w.length; i++) {
+      w[i] = random.nextDouble() - 0.5;
+    }
+    return LinearDataGenerator.generateLinearInputAsList(0.0, w, numPoints, 42, std);
   }
 
   @Test
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 1a47344b68..3676d9c5de 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification
 import scala.collection.JavaConverters._
 import scala.util.Random
 
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseVector => BDV}
 
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.mllib.linalg.Vectors
@@ -45,12 +45,11 @@ object SVMSuite {
     nPoints: Int,
     seed: Int): Seq[LabeledPoint] = {
     val rnd = new Random(seed)
-    val weightsMat = new DoubleMatrix(1, weights.length, weights: _*)
+    val weightsMat = new BDV(weights)
     val x = Array.fill[Array[Double]](nPoints)(
         Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
     val y = x.map { xi =>
-      val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
-        intercept + 0.01 * rnd.nextGaussian()
+      val yD = new BDV(xi).dot(weightsMat) + intercept + 0.01 * rnd.nextGaussian()
       if (yD < 0) 0.0 else 1.0
     }
     y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2)))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
index d8f9b8c339..4ec3dc0df0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -19,28 +19,22 @@ package org.apache.spark.mllib.optimization
 
 import scala.util.Random
 
-import org.jblas.{DoubleMatrix, SimpleBlas}
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.TestingUtils._
 
 class NNLSSuite extends SparkFunSuite {
   /** Generate an NNLS problem whose optimal solution is the all-ones vector. */
-  def genOnesData(n: Int, rand: Random): (DoubleMatrix, DoubleMatrix) = {
-    val A = new DoubleMatrix(n, n, Array.fill(n*n)(rand.nextDouble()): _*)
-    val b = A.mmul(DoubleMatrix.ones(n, 1))
-
-    val ata = A.transpose.mmul(A)
-    val atb = A.transpose.mmul(b)
-
-    (ata, atb)
+  def genOnesData(n: Int, rand: Random): (BDM[Double], BDV[Double]) = {
+    val A = new BDM(n, n, Array.fill(n*n)(rand.nextDouble()))
+    val b = A * new BDV(Array.fill(n)(1.0))
+    (A.t * A, A.t * b)
   }
 
   /** Compute the objective value */
-  def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = {
-    val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
-    res.get(0)
-  }
+  def computeObjectiveValue(ata: BDM[Double], atb: BDV[Double], x: BDV[Double]): Double =
+    (x.t * ata * x) / 2.0 - atb.dot(x)
 
   test("NNLS: exact solution cases") {
     val n = 20
@@ -54,12 +48,15 @@ class NNLSSuite extends SparkFunSuite {
 
     for (k <- 0 until 100) {
       val (ata, atb) = genOnesData(n, rand)
-      val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+      val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
       assert(x.length === n)
-      val answer = DoubleMatrix.ones(n, 1)
-      SimpleBlas.axpy(-1.0, answer, x)
-      val solved = (x.norm2 < 1e-2) && (x.normmax < 1e-3)
-      if (solved) numSolved = numSolved + 1
+      val answer = new BDV(Array.fill(n)(1.0))
+      val solved =
+        (breeze.linalg.norm(x - answer) < 0.01) &&    // L2 norm
+        ((x - answer).toArray.map(_.abs).max < 0.001) // inf norm
+      if (solved) {
+        numSolved += 1
+      }
     }
 
     assert(numSolved > 50)
@@ -67,20 +64,18 @@ class NNLSSuite extends SparkFunSuite {
 
   test("NNLS: nonnegativity constraint active") {
     val n = 5
-    // scalastyle:off
-    val ata = new DoubleMatrix(Array(
-      Array( 4.377, -3.531, -1.306, -0.139,  3.418),
-      Array(-3.531,  4.344,  0.934,  0.305, -2.140),
-      Array(-1.306,  0.934,  2.644, -0.203, -0.170),
-      Array(-0.139,  0.305, -0.203,  5.883,  1.428),
-      Array( 3.418, -2.140, -0.170,  1.428,  4.684)))
-    // scalastyle:on
-    val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636))
+    val ata = Array(
+       4.377, -3.531, -1.306, -0.139, 3.418,
+      -3.531, 4.344, 0.934, 0.305, -2.140,
+      -1.306, 0.934, 2.644, -0.203, -0.170,
+      -0.139, 0.305, -0.203, 5.883, 1.428,
+       3.418, -2.140, -0.170, 1.428, 4.684)
+    val atb = Array(-1.632, 2.115, 1.094, -1.025, -0.636)
 
     val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628)
 
     val ws = NNLS.createWorkspace(n)
-    val x = NNLS.solve(ata.data, atb.data, ws)
+    val x = NNLS.solve(ata, atb, ws)
     for (i <- 0 until n) {
       assert(x(i) ~== goodx(i) absTol 1E-3)
       assert(x(i) >= 0)
@@ -89,23 +84,21 @@ class NNLSSuite extends SparkFunSuite {
 
   test("NNLS: objective value test") {
     val n = 5
-    val ata = new DoubleMatrix(5, 5
-      , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
-      , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
-      , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
-      , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
-      , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814
-    )
-    val atb = new DoubleMatrix(5, 1,
-      -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
+    val ata = new BDM(5, 5, Array(
+      517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283,
+      242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884,
+      -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049,
+      130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819,
+      -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814))
+    val atb = new BDV(Array(-31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017))
 
     /** reference solution obtained from matlab function quadprog */
-    val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
+    val refx = new BDV(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
     val refObj = computeObjectiveValue(ata, atb, refx)
 
 
     val ws = NNLS.createWorkspace(n)
-    val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+    val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
     val obj = computeObjectiveValue(ata, atb, x)
 
     assert(obj < refObj + 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 045135f7f8..d9dc557e3b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import scala.math.abs
 import scala.util.Random
 
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseMatrix => BDM}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -29,16 +29,16 @@ import org.apache.spark.storage.StorageLevel
 
 object ALSSuite {
 
-  def generateRatingsAsJavaList(
+  def generateRatingsAsJava(
       users: Int,
       products: Int,
       features: Int,
       samplingRate: Double,
       implicitPrefs: Boolean,
-      negativeWeights: Boolean): (java.util.List[Rating], DoubleMatrix, DoubleMatrix) = {
+      negativeWeights: Boolean): (java.util.List[Rating], Array[Double], Array[Double]) = {
     val (sampledRatings, trueRatings, truePrefs) =
-      generateRatings(users, products, features, samplingRate, implicitPrefs)
-    (sampledRatings.asJava, trueRatings, truePrefs)
+      generateRatings(users, products, features, samplingRate, implicitPrefs, negativeWeights)
+    (sampledRatings.asJava, trueRatings.toArray, if (truePrefs == null) null else truePrefs.toArray)
   }
 
   def generateRatings(
@@ -48,35 +48,36 @@ object ALSSuite {
       samplingRate: Double,
       implicitPrefs: Boolean = false,
       negativeWeights: Boolean = false,
-      negativeFactors: Boolean = true): (Seq[Rating], DoubleMatrix, DoubleMatrix) = {
+      negativeFactors: Boolean = true): (Seq[Rating], BDM[Double], BDM[Double]) = {
     val rand = new Random(42)
 
     // Create a random matrix with uniform values from -1 to 1
     def randomMatrix(m: Int, n: Int) = {
       if (negativeFactors) {
-        new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*)
+        new BDM(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1))
       } else {
-        new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble()): _*)
+        new BDM(m, n, Array.fill(m * n)(rand.nextDouble()))
       }
     }
 
     val userMatrix = randomMatrix(users, features)
     val productMatrix = randomMatrix(features, products)
-    val (trueRatings, truePrefs) = implicitPrefs match {
-      case true =>
+    val (trueRatings, truePrefs) =
+      if (implicitPrefs) {
         // Generate raw values from [0,9], or if negativeWeights, from [-2,7]
-        val raw = new DoubleMatrix(users, products,
+        val raw = new BDM(users, products,
           Array.fill(users * products)(
-            (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble): _*)
+            (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble))
         val prefs =
-          new DoubleMatrix(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0): _*)
+          new BDM(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0))
         (raw, prefs)
-      case false => (userMatrix.mmul(productMatrix), null)
-    }
+      } else {
+        (userMatrix * productMatrix, null)
+      }
 
     val sampledRatings = {
       for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate)
-        yield Rating(u, p, trueRatings.get(u, p))
+        yield Rating(u, p, trueRatings(u, p))
     }
 
     (sampledRatings, trueRatings, truePrefs)
@@ -149,8 +150,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
       .setSeed(1)
       .setFinalRDDStorageLevel(storageLevel)
       .run(ratings)
-    assert(model.productFeatures.getStorageLevel == storageLevel);
-    assert(model.userFeatures.getStorageLevel == storageLevel);
+    assert(model.productFeatures.getStorageLevel == storageLevel)
+    assert(model.userFeatures.getStorageLevel == storageLevel)
     storageLevel = StorageLevel.DISK_ONLY
     model = new ALS()
       .setRank(5)
@@ -160,8 +161,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
       .setSeed(1)
       .setFinalRDDStorageLevel(storageLevel)
       .run(ratings)
-    assert(model.productFeatures.getStorageLevel == storageLevel);
-    assert(model.userFeatures.getStorageLevel == storageLevel);
+    assert(model.productFeatures.getStorageLevel == storageLevel)
+    assert(model.userFeatures.getStorageLevel == storageLevel)
   }
 
   test("negative ids") {
@@ -178,7 +179,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
       val u = r.user + 25
       val p = r.product + 25
       val v = r.rating
-      val error = v - correct.get(u, p)
+      val error = v - correct(u, p)
       assert(math.abs(error) < 0.4)
     }
   }
@@ -197,7 +198,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
    * @param samplingRate what fraction of the user-product pairs are known
    * @param matchThreshold max difference allowed to consider a predicted rating correct
    * @param implicitPrefs flag to test implicit feedback
-   * @param bulkPredict flag to test bulk predicition
+   * @param bulkPredict flag to test bulk prediction
    * @param negativeWeights whether the generated data can contain negative values
    * @param numUserBlocks number of user blocks to partition users into
    * @param numProductBlocks number of product blocks to partition products into
@@ -234,30 +235,31 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
       .setNonnegative(!negativeFactors)
       .run(sc.parallelize(sampledRatings))
 
-    val predictedU = new DoubleMatrix(users, features)
+    val predictedU = new BDM[Double](users, features)
     for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) {
-      predictedU.put(u, i, vec(i))
+      predictedU(u, i) = vec(i)
     }
-    val predictedP = new DoubleMatrix(products, features)
+    val predictedP = new BDM[Double](products, features)
     for ((p, vec) <- model.productFeatures.collect(); i <- 0 until features) {
-      predictedP.put(p, i, vec(i))
+      predictedP(p, i) = vec(i)
     }
-    val predictedRatings = bulkPredict match {
-      case false => predictedU.mmul(predictedP.transpose)
-      case true =>
-        val allRatings = new DoubleMatrix(users, products)
+    val predictedRatings =
+      if (bulkPredict) {
+        val allRatings = new BDM[Double](users, products)
         val usersProducts = for (u <- 0 until users; p <- 0 until products) yield (u, p)
         val userProductsRDD = sc.parallelize(usersProducts)
         model.predict(userProductsRDD).collect().foreach { elem =>
-          allRatings.put(elem.user, elem.product, elem.rating)
+          allRatings(elem.user, elem.product) = elem.rating
         }
         allRatings
-    }
+      } else {
+        predictedU * predictedP.t
+      }
 
     if (!implicitPrefs) {
       for (u <- 0 until users; p <- 0 until products) {
-        val prediction = predictedRatings.get(u, p)
-        val correct = trueRatings.get(u, p)
+        val prediction = predictedRatings(u, p)
+        val correct = trueRatings(u, p)
         if (math.abs(prediction - correct) > matchThreshold) {
           fail(("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: %s\nU: %s\n P: %s")
             .format(u, p, correct, prediction, trueRatings, predictedRatings, predictedU,
@@ -269,9 +271,9 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
       var sqErr = 0.0
       var denom = 0.0
       for (u <- 0 until users; p <- 0 until products) {
-        val prediction = predictedRatings.get(u, p)
-        val truePref = truePrefs.get(u, p)
-        val confidence = 1 + 1.0 * abs(trueRatings.get(u, p))
+        val prediction = predictedRatings(u, p)
+        val truePref = truePrefs(u, p)
+        val confidence = 1.0 + abs(trueRatings(u, p))
         val err = confidence * (truePref - prediction) * (truePref - prediction)
         sqErr += err
         denom += confidence
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index a200e9454b..815be32d2e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.regression
 
 import scala.util.Random
 
-import org.jblas.DoubleMatrix
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.{LinearDataGenerator, LocalClusterSparkContext,
@@ -49,12 +47,12 @@ class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
     val numExamples = 50
     val numFeatures = 20
 
-    org.jblas.util.Random.seed(42)
     // Pick weights as random values distributed uniformly in [-0.5, 0.5]
-    val w = DoubleMatrix.rand(numFeatures, 1).subi(0.5)
+    val random = new Random(42)
+    val w = Array.fill(numFeatures)(random.nextDouble() - 0.5)
 
     // Use half of data for training and other half for validation
-    val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2 * numExamples, 42, 10.0)
+    val data = LinearDataGenerator.generateLinearInput(3.0, w, 2 * numExamples, 42, 10.0)
     val testData = data.take(numExamples)
     val validationData = data.takeRight(numExamples)
author	Sean Owen <sowen@cloudera.com>	2016-03-08 17:47:55 +0000
committer	Sean Owen <sowen@cloudera.com>	2016-03-08 17:47:55 +0000
commit	54040f8d350d2aad3078dcffef808c62b7c0b73d (patch)
tree	1eacb775f8426130a3152cf9afd30c47fd5f9143 /mllib/src/test
parent	ca1a7b9d6acf8e1f9b6ab6265f9001c2c7ff8489 (diff)
download	spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.tar.gz spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.tar.bz2 spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.zip