aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-02-25 23:43:29 -0800
committerXiangrui Meng <meng@databricks.com>2015-02-25 23:43:29 -0800
commite43139f40309995b1133c7ef2936ab858b7b44fc (patch)
tree03f58ac6d3ed3ff771c8fda5df9f03b6de1f9f2e /mllib/src/test
parentd20559b157743981b9c09e286f2aaff8cbefab59 (diff)
downloadspark-e43139f40309995b1133c7ef2936ab858b7b44fc.tar.gz
spark-e43139f40309995b1133c7ef2936ab858b7b44fc.tar.bz2
spark-e43139f40309995b1133c7ef2936ab858b7b44fc.zip
[SPARK-5976][MLLIB] Add partitioner to factors returned by ALS
The model trained by ALS requires partitioning information to do quick lookup of a user/item factor for making recommendation on individual requests. In the new implementation, we didn't set partitioners in the factors returned by ALS, which would cause performance regression. srowen coderxiang Author: Xiangrui Meng <meng@databricks.com> Closes #4748 from mengxr/SPARK-5976 and squashes the following commits: 9373a09 [Xiangrui Meng] add partitioner to factors returned by ALS 260f183 [Xiangrui Meng] add a test for partitioner
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala32
1 files changed, 31 insertions, 1 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 376c3626f9..bb86bafc0e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable.ArrayBuffer
import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.scalatest.FunSuite
-import org.apache.spark.Logging
+import org.apache.spark.{Logging, SparkException}
import org.apache.spark.ml.recommendation.ALS._
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -455,4 +455,34 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
assert(isNonnegative(itemFactors))
// TODO: Validate the solution.
}
+
+ test("als partitioner is a projection") {
+ for (p <- Seq(1, 10, 100, 1000)) {
+ val part = new ALSPartitioner(p)
+ var k = 0
+ while (k < p) {
+ assert(k === part.getPartition(k))
+ assert(k === part.getPartition(k.toLong))
+ k += 1
+ }
+ }
+ }
+
+ test("partitioner in returned factors") {
+ val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
+ val (userFactors, itemFactors) = ALS.train(
+ ratings, rank = 2, maxIter = 4, numUserBlocks = 3, numItemBlocks = 4)
+ for ((tpe, factors) <- Seq(("User", userFactors), ("Item", itemFactors))) {
+ assert(userFactors.partitioner.isDefined, s"$tpe factors should have partitioner.")
+ val part = userFactors.partitioner.get
+ userFactors.mapPartitionsWithIndex { (idx, items) =>
+ items.foreach { case (id, _) =>
+ if (part.getPartition(id) != idx) {
+ throw new SparkException(s"$tpe with ID $id should not be in partition $idx.")
+ }
+ }
+ Iterator.empty
+ }.count()
+ }
+ }
}