aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-03-20 15:02:57 -0400
committerXiangrui Meng <meng@databricks.com>2015-03-20 15:02:57 -0400
commit6b36470c66bd6140c45e45d3f1d51b0082c3fd97 (patch)
tree2887dd51713f682e1a0e40bfef6b359a3a3e4609 /mllib/src/test
parent25636d9867c6bc901463b6b227cb444d701cfdd1 (diff)
downloadspark-6b36470c66bd6140c45e45d3f1d51b0082c3fd97.tar.gz
spark-6b36470c66bd6140c45e45d3f1d51b0082c3fd97.tar.bz2
spark-6b36470c66bd6140c45e45d3f1d51b0082c3fd97.zip
[SPARK-5955][MLLIB] add checkpointInterval to ALS
Add checkpiontInterval to ALS to prevent: 1. StackOverflow exceptions caused by long lineage, 2. large shuffle files generated during iterations, 3. slow recovery when some node fail. srowen coderxiang Author: Xiangrui Meng <meng@databricks.com> Closes #5076 from mengxr/SPARK-5955 and squashes the following commits: df56791 [Xiangrui Meng] update impl to reuse code 29affcb [Xiangrui Meng] do not materialize factors in implicit 20d3f7f [Xiangrui Meng] add checkpointInterval to ALS
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala17
1 files changed, 17 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index bb86bafc0e..0bb06e9e8a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -17,6 +17,7 @@
package org.apache.spark.ml.recommendation
+import java.io.File
import java.util.Random
import scala.collection.mutable
@@ -32,16 +33,25 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.util.Utils
class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
private var sqlContext: SQLContext = _
+ private var tempDir: File = _
override def beforeAll(): Unit = {
super.beforeAll()
+ tempDir = Utils.createTempDir()
+ sc.setCheckpointDir(tempDir.getAbsolutePath)
sqlContext = new SQLContext(sc)
}
+ override def afterAll(): Unit = {
+ Utils.deleteRecursively(tempDir)
+ super.afterAll()
+ }
+
test("LocalIndexEncoder") {
val random = new Random
for (numBlocks <- Seq(1, 2, 5, 10, 20, 50, 100)) {
@@ -485,4 +495,11 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
}.count()
}
}
+
+ test("als with large number of iterations") {
+ val (ratings, _) = genExplicitTestData(numUsers = 4, numItems = 4, rank = 1)
+ ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2)
+ ALS.train(
+ ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, implicitPrefs = true)
+ }
}