1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package spark.mllib.recommendation
import scala.util.Random
import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
import spark.SparkContext
import spark.SparkContext._
import org.jblas._
class ALSSuite extends FunSuite with BeforeAndAfterAll {
val sc = new SparkContext("local", "test")
override def afterAll() {
sc.stop()
System.clearProperty("spark.driver.port")
}
test("rank-1 matrices") {
testALS(10, 20, 1, 15, 0.7, 0.3)
}
test("rank-2 matrices") {
testALS(20, 30, 2, 15, 0.7, 0.3)
}
/**
* Test if we can correctly factorize R = U * P where U and P are of known rank.
*
* @param users number of users
* @param products number of products
* @param features number of features (rank of problem)
* @param iterations number of iterations to run
* @param samplingRate what fraction of the user-product pairs are known
* @param matchThreshold max difference allowed to consider a predicted rating correct
*/
def testALS(users: Int, products: Int, features: Int, iterations: Int,
samplingRate: Double, matchThreshold: Double)
{
val rand = new Random(42)
// Create a random matrix with uniform values from -1 to 1
def randomMatrix(m: Int, n: Int) =
new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*)
val userMatrix = randomMatrix(users, features)
val productMatrix = randomMatrix(features, products)
val trueRatings = userMatrix.mmul(productMatrix)
val sampledRatings = {
for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate)
yield (u, p, trueRatings.get(u, p))
}
val model = ALS.train(sc.parallelize(sampledRatings), features, iterations)
val predictedU = new DoubleMatrix(users, features)
for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) {
predictedU.put(u, i, vec(i))
}
val predictedP = new DoubleMatrix(products, features)
for ((p, vec) <- model.productFeatures.collect(); i <- 0 until features) {
predictedP.put(p, i, vec(i))
}
val predictedRatings = predictedU.mmul(predictedP.transpose)
for (u <- 0 until users; p <- 0 until products) {
val prediction = predictedRatings.get(u, p)
val correct = trueRatings.get(u, p)
if (math.abs(prediction - correct) > matchThreshold) {
fail("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: %s\nU: %s\n P: %s".format(
u, p, correct, prediction, trueRatings, predictedRatings, predictedU, predictedP))
}
}
}
}
|