From d9203350b06a9c737421ba244162b1365402c01b Mon Sep 17 00:00:00 2001 From: Tor Myklebust Date: Wed, 11 Jun 2014 18:16:33 -0700 Subject: [SPARK-1672][MLLIB] Separate user and product partitioning in ALS Some clean up work following #593. 1. Allow to set different number user blocks and number product blocks in `ALS`. 2. Update `MovieLensALS` to reflect the change. Author: Tor Myklebust Author: Xiangrui Meng Closes #1014 from mengxr/SPARK-1672 and squashes the following commits: 0e910dd [Xiangrui Meng] change private[this] to private[recommendation] 36420c7 [Xiangrui Meng] set exclusion rules for ALS 9128b77 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-1672 294efe9 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-1672 9bab77b [Xiangrui Meng] clean up add numUserBlocks and numProductBlocks to MovieLensALS 84c8e8c [Xiangrui Meng] Merge branch 'master' into SPARK-1672 d17a8bf [Xiangrui Meng] merge master a4925fd [Tor Myklebust] Style. bd8a75c [Tor Myklebust] Merge branch 'master' of github.com:apache/spark into alsseppar 021f54b [Tor Myklebust] Separate user and product blocks. dcf583a [Tor Myklebust] Remove the partitioner member variable; instead, thread that needle everywhere it needs to go. 23d6f91 [Tor Myklebust] Stop making the partitioner configurable. 495784f [Tor Myklebust] Merge branch 'master' of https://github.com/apache/spark 674933a [Tor Myklebust] Fix style. 40edc23 [Tor Myklebust] Fix missing space. f841345 [Tor Myklebust] Fix daft bug creating 'pairs', also for -> foreach. 5ec9e6c [Tor Myklebust] Clean a couple of things up using 'map'. 36a0f43 [Tor Myklebust] Make the partitioner private. d872b09 [Tor Myklebust] Add negative id ALS test. df27697 [Tor Myklebust] Support custom partitioners. Currently we use the same partitioner for users and products. c90b6d8 [Tor Myklebust] Scramble user and product ids before bucketing. c774d7d [Tor Myklebust] Make the partitioner a member variable and use it instead of modding directly. --- .../scala/org/apache/spark/examples/mllib/MovieLensALS.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'examples/src') diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala index 6eb41e7ba3..28e201d279 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala @@ -50,6 +50,8 @@ object MovieLensALS { numIterations: Int = 20, lambda: Double = 1.0, rank: Int = 10, + numUserBlocks: Int = -1, + numProductBlocks: Int = -1, implicitPrefs: Boolean = false) def main(args: Array[String]) { @@ -67,8 +69,14 @@ object MovieLensALS { .text(s"lambda (smoothing constant), default: ${defaultParams.lambda}") .action((x, c) => c.copy(lambda = x)) opt[Unit]("kryo") - .text(s"use Kryo serialization") + .text("use Kryo serialization") .action((_, c) => c.copy(kryo = true)) + opt[Int]("numUserBlocks") + .text(s"number of user blocks, default: ${defaultParams.numUserBlocks} (auto)") + .action((x, c) => c.copy(numUserBlocks = x)) + opt[Int]("numProductBlocks") + .text(s"number of product blocks, default: ${defaultParams.numProductBlocks} (auto)") + .action((x, c) => c.copy(numProductBlocks = x)) opt[Unit]("implicitPrefs") .text("use implicit preference") .action((_, c) => c.copy(implicitPrefs = true)) @@ -160,6 +168,8 @@ object MovieLensALS { .setIterations(params.numIterations) .setLambda(params.lambda) .setImplicitPrefs(params.implicitPrefs) + .setUserBlocks(params.numUserBlocks) + .setProductBlocks(params.numProductBlocks) .run(training) val rmse = computeRmse(model, test, params.implicitPrefs) -- cgit v1.2.3