aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-03-08 17:47:55 +0000
committerSean Owen <sowen@cloudera.com>2016-03-08 17:47:55 +0000
commit54040f8d350d2aad3078dcffef808c62b7c0b73d (patch)
tree1eacb775f8426130a3152cf9afd30c47fd5f9143 /mllib/src/test
parentca1a7b9d6acf8e1f9b6ab6265f9001c2c7ff8489 (diff)
downloadspark-54040f8d350d2aad3078dcffef808c62b7c0b73d.tar.gz
spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.tar.bz2
spark-54040f8d350d2aad3078dcffef808c62b7c0b73d.zip
[SPARK-13715][MLLIB] Remove last usages of jblas in tests
## What changes were proposed in this pull request? Remove last usage of jblas, in tests ## How was this patch tested? Jenkins tests -- the same ones that are being modified. Author: Sean Owen <sowen@cloudera.com> Closes #11560 from srowen/SPARK-13715.
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java39
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java17
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala7
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala71
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala76
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala8
6 files changed, 106 insertions, 112 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
index a6631ed7eb..d0bf7f556d 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -24,7 +24,6 @@ import java.util.List;
import scala.Tuple2;
import scala.Tuple3;
-import org.jblas.DoubleMatrix;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -48,14 +47,14 @@ public class JavaALSSuite implements Serializable {
sc = null;
}
- void validatePrediction(
+ private void validatePrediction(
MatrixFactorizationModel model,
int users,
int products,
- DoubleMatrix trueRatings,
+ double[] trueRatings,
double matchThreshold,
boolean implicitPrefs,
- DoubleMatrix truePrefs) {
+ double[] truePrefs) {
List<Tuple2<Integer, Integer>> localUsersProducts = new ArrayList<>(users * products);
for (int u=0; u < users; ++u) {
for (int p=0; p < products; ++p) {
@@ -68,7 +67,7 @@ public class JavaALSSuite implements Serializable {
if (!implicitPrefs) {
for (Rating r: predictedRatings) {
double prediction = r.rating();
- double correct = trueRatings.get(r.user(), r.product());
+ double correct = trueRatings[r.product() * users + r.user()];
Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
}
@@ -79,9 +78,9 @@ public class JavaALSSuite implements Serializable {
double denom = 0.0;
for (Rating r: predictedRatings) {
double prediction = r.rating();
- double truePref = truePrefs.get(r.user(), r.product());
+ double truePref = truePrefs[r.product() * users + r.user()];
double confidence = 1.0 +
- /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
+ /* alpha = 1.0 * ... */ Math.abs(trueRatings[r.product() * users + r.user()]);
double err = confidence * (truePref - prediction) * (truePref - prediction);
sqErr += err;
denom += confidence;
@@ -98,8 +97,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 50;
int products = 100;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, false, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
@@ -112,8 +111,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 100;
int products = 200;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, false, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
@@ -129,8 +128,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 80;
int products = 160;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
@@ -143,8 +142,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 100;
int products = 200;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
@@ -161,8 +160,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 80;
int products = 160;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, true);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, true);
JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = new ALS().setRank(features)
@@ -179,9 +178,9 @@ public class JavaALSSuite implements Serializable {
int iterations = 10;
int users = 200;
int products = 50;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, false);
- JavaRDD<Rating> data = sc.parallelize(testData._1());
+ List<Rating> testData = ALSSuite.generateRatingsAsJava(
+ users, products, features, 0.7, true, false)._1();
+ JavaRDD<Rating> data = sc.parallelize(testData);
MatrixFactorizationModel model = new ALS().setRank(features)
.setIterations(iterations)
.setImplicitPrefs(true)
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
index 7266eec235..c56db703ea 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -19,14 +19,13 @@ package org.apache.spark.mllib.regression;
import java.io.Serializable;
import java.util.List;
+import java.util.Random;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import org.jblas.DoubleMatrix;
-
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.util.LinearDataGenerator;
@@ -45,7 +44,8 @@ public class JavaRidgeRegressionSuite implements Serializable {
sc = null;
}
- double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) {
+ private static double predictionError(List<LabeledPoint> validationData,
+ RidgeRegressionModel model) {
double errorSum = 0;
for (LabeledPoint point: validationData) {
Double prediction = model.predict(point.features());
@@ -54,11 +54,14 @@ public class JavaRidgeRegressionSuite implements Serializable {
return errorSum / validationData.size();
}
- List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
- org.jblas.util.Random.seed(42);
+ private static List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
// Pick weights as random values distributed uniformly in [-0.5, 0.5]
- DoubleMatrix w = DoubleMatrix.rand(numFeatures, 1).subi(0.5);
- return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, std);
+ Random random = new Random(42);
+ double[] w = new double[numFeatures];
+ for (int i = 0; i < w.length; i++) {
+ w[i] = random.nextDouble() - 0.5;
+ }
+ return LinearDataGenerator.generateLinearInputAsList(0.0, w, numPoints, 42, std);
}
@Test
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 1a47344b68..3676d9c5de 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification
import scala.collection.JavaConverters._
import scala.util.Random
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseVector => BDV}
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.mllib.linalg.Vectors
@@ -45,12 +45,11 @@ object SVMSuite {
nPoints: Int,
seed: Int): Seq[LabeledPoint] = {
val rnd = new Random(seed)
- val weightsMat = new DoubleMatrix(1, weights.length, weights: _*)
+ val weightsMat = new BDV(weights)
val x = Array.fill[Array[Double]](nPoints)(
Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
val y = x.map { xi =>
- val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
- intercept + 0.01 * rnd.nextGaussian()
+ val yD = new BDV(xi).dot(weightsMat) + intercept + 0.01 * rnd.nextGaussian()
if (yD < 0) 0.0 else 1.0
}
y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2)))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
index d8f9b8c339..4ec3dc0df0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -19,28 +19,22 @@ package org.apache.spark.mllib.optimization
import scala.util.Random
-import org.jblas.{DoubleMatrix, SimpleBlas}
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.TestingUtils._
class NNLSSuite extends SparkFunSuite {
/** Generate an NNLS problem whose optimal solution is the all-ones vector. */
- def genOnesData(n: Int, rand: Random): (DoubleMatrix, DoubleMatrix) = {
- val A = new DoubleMatrix(n, n, Array.fill(n*n)(rand.nextDouble()): _*)
- val b = A.mmul(DoubleMatrix.ones(n, 1))
-
- val ata = A.transpose.mmul(A)
- val atb = A.transpose.mmul(b)
-
- (ata, atb)
+ def genOnesData(n: Int, rand: Random): (BDM[Double], BDV[Double]) = {
+ val A = new BDM(n, n, Array.fill(n*n)(rand.nextDouble()))
+ val b = A * new BDV(Array.fill(n)(1.0))
+ (A.t * A, A.t * b)
}
/** Compute the objective value */
- def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = {
- val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
- res.get(0)
- }
+ def computeObjectiveValue(ata: BDM[Double], atb: BDV[Double], x: BDV[Double]): Double =
+ (x.t * ata * x) / 2.0 - atb.dot(x)
test("NNLS: exact solution cases") {
val n = 20
@@ -54,12 +48,15 @@ class NNLSSuite extends SparkFunSuite {
for (k <- 0 until 100) {
val (ata, atb) = genOnesData(n, rand)
- val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+ val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
assert(x.length === n)
- val answer = DoubleMatrix.ones(n, 1)
- SimpleBlas.axpy(-1.0, answer, x)
- val solved = (x.norm2 < 1e-2) && (x.normmax < 1e-3)
- if (solved) numSolved = numSolved + 1
+ val answer = new BDV(Array.fill(n)(1.0))
+ val solved =
+ (breeze.linalg.norm(x - answer) < 0.01) && // L2 norm
+ ((x - answer).toArray.map(_.abs).max < 0.001) // inf norm
+ if (solved) {
+ numSolved += 1
+ }
}
assert(numSolved > 50)
@@ -67,20 +64,18 @@ class NNLSSuite extends SparkFunSuite {
test("NNLS: nonnegativity constraint active") {
val n = 5
- // scalastyle:off
- val ata = new DoubleMatrix(Array(
- Array( 4.377, -3.531, -1.306, -0.139, 3.418),
- Array(-3.531, 4.344, 0.934, 0.305, -2.140),
- Array(-1.306, 0.934, 2.644, -0.203, -0.170),
- Array(-0.139, 0.305, -0.203, 5.883, 1.428),
- Array( 3.418, -2.140, -0.170, 1.428, 4.684)))
- // scalastyle:on
- val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636))
+ val ata = Array(
+ 4.377, -3.531, -1.306, -0.139, 3.418,
+ -3.531, 4.344, 0.934, 0.305, -2.140,
+ -1.306, 0.934, 2.644, -0.203, -0.170,
+ -0.139, 0.305, -0.203, 5.883, 1.428,
+ 3.418, -2.140, -0.170, 1.428, 4.684)
+ val atb = Array(-1.632, 2.115, 1.094, -1.025, -0.636)
val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628)
val ws = NNLS.createWorkspace(n)
- val x = NNLS.solve(ata.data, atb.data, ws)
+ val x = NNLS.solve(ata, atb, ws)
for (i <- 0 until n) {
assert(x(i) ~== goodx(i) absTol 1E-3)
assert(x(i) >= 0)
@@ -89,23 +84,21 @@ class NNLSSuite extends SparkFunSuite {
test("NNLS: objective value test") {
val n = 5
- val ata = new DoubleMatrix(5, 5
- , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
- , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
- , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
- , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
- , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814
- )
- val atb = new DoubleMatrix(5, 1,
- -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
+ val ata = new BDM(5, 5, Array(
+ 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283,
+ 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884,
+ -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049,
+ 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819,
+ -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814))
+ val atb = new BDV(Array(-31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017))
/** reference solution obtained from matlab function quadprog */
- val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
+ val refx = new BDV(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
val refObj = computeObjectiveValue(ata, atb, refx)
val ws = NNLS.createWorkspace(n)
- val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+ val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
val obj = computeObjectiveValue(ata, atb, x)
assert(obj < refObj + 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 045135f7f8..d9dc557e3b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
import scala.math.abs
import scala.util.Random
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseMatrix => BDM}
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -29,16 +29,16 @@ import org.apache.spark.storage.StorageLevel
object ALSSuite {
- def generateRatingsAsJavaList(
+ def generateRatingsAsJava(
users: Int,
products: Int,
features: Int,
samplingRate: Double,
implicitPrefs: Boolean,
- negativeWeights: Boolean): (java.util.List[Rating], DoubleMatrix, DoubleMatrix) = {
+ negativeWeights: Boolean): (java.util.List[Rating], Array[Double], Array[Double]) = {
val (sampledRatings, trueRatings, truePrefs) =
- generateRatings(users, products, features, samplingRate, implicitPrefs)
- (sampledRatings.asJava, trueRatings, truePrefs)
+ generateRatings(users, products, features, samplingRate, implicitPrefs, negativeWeights)
+ (sampledRatings.asJava, trueRatings.toArray, if (truePrefs == null) null else truePrefs.toArray)
}
def generateRatings(
@@ -48,35 +48,36 @@ object ALSSuite {
samplingRate: Double,
implicitPrefs: Boolean = false,
negativeWeights: Boolean = false,
- negativeFactors: Boolean = true): (Seq[Rating], DoubleMatrix, DoubleMatrix) = {
+ negativeFactors: Boolean = true): (Seq[Rating], BDM[Double], BDM[Double]) = {
val rand = new Random(42)
// Create a random matrix with uniform values from -1 to 1
def randomMatrix(m: Int, n: Int) = {
if (negativeFactors) {
- new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*)
+ new BDM(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1))
} else {
- new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble()): _*)
+ new BDM(m, n, Array.fill(m * n)(rand.nextDouble()))
}
}
val userMatrix = randomMatrix(users, features)
val productMatrix = randomMatrix(features, products)
- val (trueRatings, truePrefs) = implicitPrefs match {
- case true =>
+ val (trueRatings, truePrefs) =
+ if (implicitPrefs) {
// Generate raw values from [0,9], or if negativeWeights, from [-2,7]
- val raw = new DoubleMatrix(users, products,
+ val raw = new BDM(users, products,
Array.fill(users * products)(
- (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble): _*)
+ (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble))
val prefs =
- new DoubleMatrix(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0): _*)
+ new BDM(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0))
(raw, prefs)
- case false => (userMatrix.mmul(productMatrix), null)
- }
+ } else {
+ (userMatrix * productMatrix, null)
+ }
val sampledRatings = {
for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate)
- yield Rating(u, p, trueRatings.get(u, p))
+ yield Rating(u, p, trueRatings(u, p))
}
(sampledRatings, trueRatings, truePrefs)
@@ -149,8 +150,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
.setSeed(1)
.setFinalRDDStorageLevel(storageLevel)
.run(ratings)
- assert(model.productFeatures.getStorageLevel == storageLevel);
- assert(model.userFeatures.getStorageLevel == storageLevel);
+ assert(model.productFeatures.getStorageLevel == storageLevel)
+ assert(model.userFeatures.getStorageLevel == storageLevel)
storageLevel = StorageLevel.DISK_ONLY
model = new ALS()
.setRank(5)
@@ -160,8 +161,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
.setSeed(1)
.setFinalRDDStorageLevel(storageLevel)
.run(ratings)
- assert(model.productFeatures.getStorageLevel == storageLevel);
- assert(model.userFeatures.getStorageLevel == storageLevel);
+ assert(model.productFeatures.getStorageLevel == storageLevel)
+ assert(model.userFeatures.getStorageLevel == storageLevel)
}
test("negative ids") {
@@ -178,7 +179,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
val u = r.user + 25
val p = r.product + 25
val v = r.rating
- val error = v - correct.get(u, p)
+ val error = v - correct(u, p)
assert(math.abs(error) < 0.4)
}
}
@@ -197,7 +198,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
* @param samplingRate what fraction of the user-product pairs are known
* @param matchThreshold max difference allowed to consider a predicted rating correct
* @param implicitPrefs flag to test implicit feedback
- * @param bulkPredict flag to test bulk predicition
+ * @param bulkPredict flag to test bulk prediction
* @param negativeWeights whether the generated data can contain negative values
* @param numUserBlocks number of user blocks to partition users into
* @param numProductBlocks number of product blocks to partition products into
@@ -234,30 +235,31 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
.setNonnegative(!negativeFactors)
.run(sc.parallelize(sampledRatings))
- val predictedU = new DoubleMatrix(users, features)
+ val predictedU = new BDM[Double](users, features)
for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) {
- predictedU.put(u, i, vec(i))
+ predictedU(u, i) = vec(i)
}
- val predictedP = new DoubleMatrix(products, features)
+ val predictedP = new BDM[Double](products, features)
for ((p, vec) <- model.productFeatures.collect(); i <- 0 until features) {
- predictedP.put(p, i, vec(i))
+ predictedP(p, i) = vec(i)
}
- val predictedRatings = bulkPredict match {
- case false => predictedU.mmul(predictedP.transpose)
- case true =>
- val allRatings = new DoubleMatrix(users, products)
+ val predictedRatings =
+ if (bulkPredict) {
+ val allRatings = new BDM[Double](users, products)
val usersProducts = for (u <- 0 until users; p <- 0 until products) yield (u, p)
val userProductsRDD = sc.parallelize(usersProducts)
model.predict(userProductsRDD).collect().foreach { elem =>
- allRatings.put(elem.user, elem.product, elem.rating)
+ allRatings(elem.user, elem.product) = elem.rating
}
allRatings
- }
+ } else {
+ predictedU * predictedP.t
+ }
if (!implicitPrefs) {
for (u <- 0 until users; p <- 0 until products) {
- val prediction = predictedRatings.get(u, p)
- val correct = trueRatings.get(u, p)
+ val prediction = predictedRatings(u, p)
+ val correct = trueRatings(u, p)
if (math.abs(prediction - correct) > matchThreshold) {
fail(("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: %s\nU: %s\n P: %s")
.format(u, p, correct, prediction, trueRatings, predictedRatings, predictedU,
@@ -269,9 +271,9 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
var sqErr = 0.0
var denom = 0.0
for (u <- 0 until users; p <- 0 until products) {
- val prediction = predictedRatings.get(u, p)
- val truePref = truePrefs.get(u, p)
- val confidence = 1 + 1.0 * abs(trueRatings.get(u, p))
+ val prediction = predictedRatings(u, p)
+ val truePref = truePrefs(u, p)
+ val confidence = 1.0 + abs(trueRatings(u, p))
val err = confidence * (truePref - prediction) * (truePref - prediction)
sqErr += err
denom += confidence
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index a200e9454b..815be32d2e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.regression
import scala.util.Random
-import org.jblas.DoubleMatrix
-
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.{LinearDataGenerator, LocalClusterSparkContext,
@@ -49,12 +47,12 @@ class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val numExamples = 50
val numFeatures = 20
- org.jblas.util.Random.seed(42)
// Pick weights as random values distributed uniformly in [-0.5, 0.5]
- val w = DoubleMatrix.rand(numFeatures, 1).subi(0.5)
+ val random = new Random(42)
+ val w = Array.fill(numFeatures)(random.nextDouble() - 0.5)
// Use half of data for training and other half for validation
- val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2 * numExamples, 42, 10.0)
+ val data = LinearDataGenerator.generateLinearInput(3.0, w, 2 * numExamples, 42, 10.0)
val testData = data.take(numExamples)
val validationData = data.takeRight(numExamples)