diff options
author | Xiangrui Meng <meng@databricks.com> | 2015-03-20 15:02:57 -0400 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-03-20 15:02:57 -0400 |
commit | 6b36470c66bd6140c45e45d3f1d51b0082c3fd97 (patch) | |
tree | 2887dd51713f682e1a0e40bfef6b359a3a3e4609 /mllib/src/test | |
parent | 25636d9867c6bc901463b6b227cb444d701cfdd1 (diff) | |
download | spark-6b36470c66bd6140c45e45d3f1d51b0082c3fd97.tar.gz spark-6b36470c66bd6140c45e45d3f1d51b0082c3fd97.tar.bz2 spark-6b36470c66bd6140c45e45d3f1d51b0082c3fd97.zip |
[SPARK-5955][MLLIB] add checkpointInterval to ALS
Add checkpiontInterval to ALS to prevent:
1. StackOverflow exceptions caused by long lineage,
2. large shuffle files generated during iterations,
3. slow recovery when some node fail.
srowen coderxiang
Author: Xiangrui Meng <meng@databricks.com>
Closes #5076 from mengxr/SPARK-5955 and squashes the following commits:
df56791 [Xiangrui Meng] update impl to reuse code
29affcb [Xiangrui Meng] do not materialize factors in implicit
20d3f7f [Xiangrui Meng] add checkpointInterval to ALS
Diffstat (limited to 'mllib/src/test')
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index bb86bafc0e..0bb06e9e8a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.ml.recommendation +import java.io.File import java.util.Random import scala.collection.mutable @@ -32,16 +33,25 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SQLContext} +import org.apache.spark.util.Utils class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { private var sqlContext: SQLContext = _ + private var tempDir: File = _ override def beforeAll(): Unit = { super.beforeAll() + tempDir = Utils.createTempDir() + sc.setCheckpointDir(tempDir.getAbsolutePath) sqlContext = new SQLContext(sc) } + override def afterAll(): Unit = { + Utils.deleteRecursively(tempDir) + super.afterAll() + } + test("LocalIndexEncoder") { val random = new Random for (numBlocks <- Seq(1, 2, 5, 10, 20, 50, 100)) { @@ -485,4 +495,11 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { }.count() } } + + test("als with large number of iterations") { + val (ratings, _) = genExplicitTestData(numUsers = 4, numItems = 4, rank = 1) + ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2) + ALS.train( + ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, implicitPrefs = true) + } } |