aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE1
-rw-r--r--docs/mllib-data-types.md2
-rw-r--r--mllib/pom.xml6
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java39
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java17
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala7
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala71
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala76
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala8
-rw-r--r--pom.xml1
10 files changed, 107 insertions, 121 deletions
diff --git a/LICENSE b/LICENSE
index 9b78f3bbf8..3c6117f4aa 100644
--- a/LICENSE
+++ b/LICENSE
@@ -237,7 +237,6 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
- (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
(BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index 363dc7c13b..5e3ee472a7 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -11,7 +11,7 @@ MLlib supports local vectors and matrices stored on a single machine,
as well as distributed matrices backed by one or more RDDs.
Local vectors and local matrices are simple data models
that serve as public interfaces. The underlying linear algebra operations are provided by
-[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/).
+[Breeze](http://www.scalanlp.org/).
A training example used in supervised learning is called a "labeled point" in MLlib.
## Local vector
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 816f3f6830..428176dcbf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -63,12 +63,6 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.jblas</groupId>
- <artifactId>jblas</artifactId>
- <version>${jblas.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.binary.version}</artifactId>
<version>0.11.2</version>
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
index a6631ed7eb..d0bf7f556d 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -24,7 +24,6 @@ import java.util.List;
import scala.Tuple2;
import scala.Tuple3;
-import org.jblas.DoubleMatrix;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -48,14 +47,14 @@ public class JavaALSSuite implements Serializable {
sc = null;
}
- void validatePrediction(
+ private void validatePrediction(
MatrixFactorizationModel model,
int users,
int products,
- DoubleMatrix trueRatings,
+ double[] trueRatings,
double matchThreshold,
boolean implicitPrefs,
- DoubleMatrix truePrefs) {
+ double[] truePrefs) {
List<Tuple2<Integer, Integer>> localUsersProducts = new ArrayList<>(users * products);
for (int u=0; u < users; ++u) {
for (int p=0; p < products; ++p) {
@@ -68,7 +67,7 @@ public class JavaALSSuite implements Serializable {
if (!implicitPrefs) {
for (Rating r: predictedRatings) {
double prediction = r.rating();
- double correct = trueRatings.get(r.user(), r.product());
+ double correct = trueRatings[r.product() * users + r.user()];
Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
}
@@ -79,9 +78,9 @@ public class JavaALSSuite implements Serializable {
double denom = 0.0;
for (Rating r: predictedRatings) {
double prediction = r.rating();
- double truePref = truePrefs.get(r.user(), r.product());
+ double truePref = truePrefs[r.product() * users + r.user()];
double confidence = 1.0 +
- /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
+ /* alpha = 1.0 * ... */ Math.abs(trueRatings[r.product() * users + r.user()]);
double err = confidence * (truePref - prediction) * (truePref - prediction);
sqErr += err;
denom += confidence;
@@ -98,8 +97,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 50;
int products = 100;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, false, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
@@ -112,8 +111,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 100;
int products = 200;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, false, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
@@ -129,8 +128,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 80;
int products = 160;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
@@ -143,8 +142,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 100;
int products = 200;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, false);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
@@ -161,8 +160,8 @@ public class JavaALSSuite implements Serializable {
int iterations = 15;
int users = 80;
int products = 160;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, true);
+ Tuple3<List<Rating>, double[], double[]> testData =
+ ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, true);
JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = new ALS().setRank(features)
@@ -179,9 +178,9 @@ public class JavaALSSuite implements Serializable {
int iterations = 10;
int users = 200;
int products = 50;
- Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
- users, products, features, 0.7, true, false);
- JavaRDD<Rating> data = sc.parallelize(testData._1());
+ List<Rating> testData = ALSSuite.generateRatingsAsJava(
+ users, products, features, 0.7, true, false)._1();
+ JavaRDD<Rating> data = sc.parallelize(testData);
MatrixFactorizationModel model = new ALS().setRank(features)
.setIterations(iterations)
.setImplicitPrefs(true)
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
index 7266eec235..c56db703ea 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -19,14 +19,13 @@ package org.apache.spark.mllib.regression;
import java.io.Serializable;
import java.util.List;
+import java.util.Random;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import org.jblas.DoubleMatrix;
-
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.util.LinearDataGenerator;
@@ -45,7 +44,8 @@ public class JavaRidgeRegressionSuite implements Serializable {
sc = null;
}
- double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) {
+ private static double predictionError(List<LabeledPoint> validationData,
+ RidgeRegressionModel model) {
double errorSum = 0;
for (LabeledPoint point: validationData) {
Double prediction = model.predict(point.features());
@@ -54,11 +54,14 @@ public class JavaRidgeRegressionSuite implements Serializable {
return errorSum / validationData.size();
}
- List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
- org.jblas.util.Random.seed(42);
+ private static List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
// Pick weights as random values distributed uniformly in [-0.5, 0.5]
- DoubleMatrix w = DoubleMatrix.rand(numFeatures, 1).subi(0.5);
- return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, std);
+ Random random = new Random(42);
+ double[] w = new double[numFeatures];
+ for (int i = 0; i < w.length; i++) {
+ w[i] = random.nextDouble() - 0.5;
+ }
+ return LinearDataGenerator.generateLinearInputAsList(0.0, w, numPoints, 42, std);
}
@Test
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 1a47344b68..3676d9c5de 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification
import scala.collection.JavaConverters._
import scala.util.Random
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseVector => BDV}
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.mllib.linalg.Vectors
@@ -45,12 +45,11 @@ object SVMSuite {
nPoints: Int,
seed: Int): Seq[LabeledPoint] = {
val rnd = new Random(seed)
- val weightsMat = new DoubleMatrix(1, weights.length, weights: _*)
+ val weightsMat = new BDV(weights)
val x = Array.fill[Array[Double]](nPoints)(
Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
val y = x.map { xi =>
- val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
- intercept + 0.01 * rnd.nextGaussian()
+ val yD = new BDV(xi).dot(weightsMat) + intercept + 0.01 * rnd.nextGaussian()
if (yD < 0) 0.0 else 1.0
}
y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2)))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
index d8f9b8c339..4ec3dc0df0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -19,28 +19,22 @@ package org.apache.spark.mllib.optimization
import scala.util.Random
-import org.jblas.{DoubleMatrix, SimpleBlas}
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.TestingUtils._
class NNLSSuite extends SparkFunSuite {
/** Generate an NNLS problem whose optimal solution is the all-ones vector. */
- def genOnesData(n: Int, rand: Random): (DoubleMatrix, DoubleMatrix) = {
- val A = new DoubleMatrix(n, n, Array.fill(n*n)(rand.nextDouble()): _*)
- val b = A.mmul(DoubleMatrix.ones(n, 1))
-
- val ata = A.transpose.mmul(A)
- val atb = A.transpose.mmul(b)
-
- (ata, atb)
+ def genOnesData(n: Int, rand: Random): (BDM[Double], BDV[Double]) = {
+ val A = new BDM(n, n, Array.fill(n*n)(rand.nextDouble()))
+ val b = A * new BDV(Array.fill(n)(1.0))
+ (A.t * A, A.t * b)
}
/** Compute the objective value */
- def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = {
- val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
- res.get(0)
- }
+ def computeObjectiveValue(ata: BDM[Double], atb: BDV[Double], x: BDV[Double]): Double =
+ (x.t * ata * x) / 2.0 - atb.dot(x)
test("NNLS: exact solution cases") {
val n = 20
@@ -54,12 +48,15 @@ class NNLSSuite extends SparkFunSuite {
for (k <- 0 until 100) {
val (ata, atb) = genOnesData(n, rand)
- val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+ val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
assert(x.length === n)
- val answer = DoubleMatrix.ones(n, 1)
- SimpleBlas.axpy(-1.0, answer, x)
- val solved = (x.norm2 < 1e-2) && (x.normmax < 1e-3)
- if (solved) numSolved = numSolved + 1
+ val answer = new BDV(Array.fill(n)(1.0))
+ val solved =
+ (breeze.linalg.norm(x - answer) < 0.01) && // L2 norm
+ ((x - answer).toArray.map(_.abs).max < 0.001) // inf norm
+ if (solved) {
+ numSolved += 1
+ }
}
assert(numSolved > 50)
@@ -67,20 +64,18 @@ class NNLSSuite extends SparkFunSuite {
test("NNLS: nonnegativity constraint active") {
val n = 5
- // scalastyle:off
- val ata = new DoubleMatrix(Array(
- Array( 4.377, -3.531, -1.306, -0.139, 3.418),
- Array(-3.531, 4.344, 0.934, 0.305, -2.140),
- Array(-1.306, 0.934, 2.644, -0.203, -0.170),
- Array(-0.139, 0.305, -0.203, 5.883, 1.428),
- Array( 3.418, -2.140, -0.170, 1.428, 4.684)))
- // scalastyle:on
- val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636))
+ val ata = Array(
+ 4.377, -3.531, -1.306, -0.139, 3.418,
+ -3.531, 4.344, 0.934, 0.305, -2.140,
+ -1.306, 0.934, 2.644, -0.203, -0.170,
+ -0.139, 0.305, -0.203, 5.883, 1.428,
+ 3.418, -2.140, -0.170, 1.428, 4.684)
+ val atb = Array(-1.632, 2.115, 1.094, -1.025, -0.636)
val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628)
val ws = NNLS.createWorkspace(n)
- val x = NNLS.solve(ata.data, atb.data, ws)
+ val x = NNLS.solve(ata, atb, ws)
for (i <- 0 until n) {
assert(x(i) ~== goodx(i) absTol 1E-3)
assert(x(i) >= 0)
@@ -89,23 +84,21 @@ class NNLSSuite extends SparkFunSuite {
test("NNLS: objective value test") {
val n = 5
- val ata = new DoubleMatrix(5, 5
- , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
- , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
- , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
- , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
- , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814
- )
- val atb = new DoubleMatrix(5, 1,
- -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
+ val ata = new BDM(5, 5, Array(
+ 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283,
+ 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884,
+ -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049,
+ 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819,
+ -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814))
+ val atb = new BDV(Array(-31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017))
/** reference solution obtained from matlab function quadprog */
- val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
+ val refx = new BDV(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
val refObj = computeObjectiveValue(ata, atb, refx)
val ws = NNLS.createWorkspace(n)
- val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+ val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
val obj = computeObjectiveValue(ata, atb, x)
assert(obj < refObj + 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 045135f7f8..d9dc557e3b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
import scala.math.abs
import scala.util.Random
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseMatrix => BDM}
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -29,16 +29,16 @@ import org.apache.spark.storage.StorageLevel
object ALSSuite {
- def generateRatingsAsJavaList(
+ def generateRatingsAsJava(
users: Int,
products: Int,
features: Int,
samplingRate: Double,
implicitPrefs: Boolean,
- negativeWeights: Boolean): (java.util.List[Rating], DoubleMatrix, DoubleMatrix) = {
+ negativeWeights: Boolean): (java.util.List[Rating], Array[Double], Array[Double]) = {
val (sampledRatings, trueRatings, truePrefs) =
- generateRatings(users, products, features, samplingRate, implicitPrefs)
- (sampledRatings.asJava, trueRatings, truePrefs)
+ generateRatings(users, products, features, samplingRate, implicitPrefs, negativeWeights)
+ (sampledRatings.asJava, trueRatings.toArray, if (truePrefs == null) null else truePrefs.toArray)
}
def generateRatings(
@@ -48,35 +48,36 @@ object ALSSuite {
samplingRate: Double,
implicitPrefs: Boolean = false,
negativeWeights: Boolean = false,
- negativeFactors: Boolean = true): (Seq[Rating], DoubleMatrix, DoubleMatrix) = {
+ negativeFactors: Boolean = true): (Seq[Rating], BDM[Double], BDM[Double]) = {
val rand = new Random(42)
// Create a random matrix with uniform values from -1 to 1
def randomMatrix(m: Int, n: Int) = {
if (negativeFactors) {
- new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*)
+ new BDM(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1))
} else {
- new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble()): _*)
+ new BDM(m, n, Array.fill(m * n)(rand.nextDouble()))
}
}
val userMatrix = randomMatrix(users, features)
val productMatrix = randomMatrix(features, products)
- val (trueRatings, truePrefs) = implicitPrefs match {
- case true =>
+ val (trueRatings, truePrefs) =
+ if (implicitPrefs) {
// Generate raw values from [0,9], or if negativeWeights, from [-2,7]
- val raw = new DoubleMatrix(users, products,
+ val raw = new BDM(users, products,
Array.fill(users * products)(
- (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble): _*)
+ (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble))
val prefs =
- new DoubleMatrix(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0): _*)
+ new BDM(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0))
(raw, prefs)
- case false => (userMatrix.mmul(productMatrix), null)
- }
+ } else {
+ (userMatrix * productMatrix, null)
+ }
val sampledRatings = {
for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate)
- yield Rating(u, p, trueRatings.get(u, p))
+ yield Rating(u, p, trueRatings(u, p))
}
(sampledRatings, trueRatings, truePrefs)
@@ -149,8 +150,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
.setSeed(1)
.setFinalRDDStorageLevel(storageLevel)
.run(ratings)
- assert(model.productFeatures.getStorageLevel == storageLevel);
- assert(model.userFeatures.getStorageLevel == storageLevel);
+ assert(model.productFeatures.getStorageLevel == storageLevel)
+ assert(model.userFeatures.getStorageLevel == storageLevel)
storageLevel = StorageLevel.DISK_ONLY
model = new ALS()
.setRank(5)
@@ -160,8 +161,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
.setSeed(1)
.setFinalRDDStorageLevel(storageLevel)
.run(ratings)
- assert(model.productFeatures.getStorageLevel == storageLevel);
- assert(model.userFeatures.getStorageLevel == storageLevel);
+ assert(model.productFeatures.getStorageLevel == storageLevel)
+ assert(model.userFeatures.getStorageLevel == storageLevel)
}
test("negative ids") {
@@ -178,7 +179,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
val u = r.user + 25
val p = r.product + 25
val v = r.rating
- val error = v - correct.get(u, p)
+ val error = v - correct(u, p)
assert(math.abs(error) < 0.4)
}
}
@@ -197,7 +198,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
* @param samplingRate what fraction of the user-product pairs are known
* @param matchThreshold max difference allowed to consider a predicted rating correct
* @param implicitPrefs flag to test implicit feedback
- * @param bulkPredict flag to test bulk predicition
+ * @param bulkPredict flag to test bulk prediction
* @param negativeWeights whether the generated data can contain negative values
* @param numUserBlocks number of user blocks to partition users into
* @param numProductBlocks number of product blocks to partition products into
@@ -234,30 +235,31 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
.setNonnegative(!negativeFactors)
.run(sc.parallelize(sampledRatings))
- val predictedU = new DoubleMatrix(users, features)
+ val predictedU = new BDM[Double](users, features)
for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) {
- predictedU.put(u, i, vec(i))
+ predictedU(u, i) = vec(i)
}
- val predictedP = new DoubleMatrix(products, features)
+ val predictedP = new BDM[Double](products, features)
for ((p, vec) <- model.productFeatures.collect(); i <- 0 until features) {
- predictedP.put(p, i, vec(i))
+ predictedP(p, i) = vec(i)
}
- val predictedRatings = bulkPredict match {
- case false => predictedU.mmul(predictedP.transpose)
- case true =>
- val allRatings = new DoubleMatrix(users, products)
+ val predictedRatings =
+ if (bulkPredict) {
+ val allRatings = new BDM[Double](users, products)
val usersProducts = for (u <- 0 until users; p <- 0 until products) yield (u, p)
val userProductsRDD = sc.parallelize(usersProducts)
model.predict(userProductsRDD).collect().foreach { elem =>
- allRatings.put(elem.user, elem.product, elem.rating)
+ allRatings(elem.user, elem.product) = elem.rating
}
allRatings
- }
+ } else {
+ predictedU * predictedP.t
+ }
if (!implicitPrefs) {
for (u <- 0 until users; p <- 0 until products) {
- val prediction = predictedRatings.get(u, p)
- val correct = trueRatings.get(u, p)
+ val prediction = predictedRatings(u, p)
+ val correct = trueRatings(u, p)
if (math.abs(prediction - correct) > matchThreshold) {
fail(("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: %s\nU: %s\n P: %s")
.format(u, p, correct, prediction, trueRatings, predictedRatings, predictedU,
@@ -269,9 +271,9 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
var sqErr = 0.0
var denom = 0.0
for (u <- 0 until users; p <- 0 until products) {
- val prediction = predictedRatings.get(u, p)
- val truePref = truePrefs.get(u, p)
- val confidence = 1 + 1.0 * abs(trueRatings.get(u, p))
+ val prediction = predictedRatings(u, p)
+ val truePref = truePrefs(u, p)
+ val confidence = 1.0 + abs(trueRatings(u, p))
val err = confidence * (truePref - prediction) * (truePref - prediction)
sqErr += err
denom += confidence
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index a200e9454b..815be32d2e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.regression
import scala.util.Random
-import org.jblas.DoubleMatrix
-
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.{LinearDataGenerator, LocalClusterSparkContext,
@@ -49,12 +47,12 @@ class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val numExamples = 50
val numFeatures = 20
- org.jblas.util.Random.seed(42)
// Pick weights as random values distributed uniformly in [-0.5, 0.5]
- val w = DoubleMatrix.rand(numFeatures, 1).subi(0.5)
+ val random = new Random(42)
+ val w = Array.fill(numFeatures)(random.nextDouble() - 0.5)
// Use half of data for training and other half for validation
- val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2 * numExamples, 42, 10.0)
+ val data = LinearDataGenerator.generateLinearInput(3.0, w, 2 * numExamples, 42, 10.0)
val testData = data.take(numExamples)
val validationData = data.takeRight(numExamples)
diff --git a/pom.xml b/pom.xml
index dccfd104cd..90f46724e1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,6 @@
<derby.version>10.10.1.1</derby.version>
<parquet.version>1.7.0</parquet.version>
<hive.parquet.version>1.6.0</hive.parquet.version>
- <jblas.version>1.2.4</jblas.version>
<jetty.version>8.1.14.v20131031</jetty.version>
<orbit.version>3.0.0.v201112011016</orbit.version>
<chill.version>0.7.4</chill.version>