aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2015-05-19 10:57:47 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-19 10:57:54 -0700
commitcd3093e705b184df1291cd8f03331a9618993693 (patch)
treec872ab98da643177be34a2a05a9a68047d0b7fbe /mllib/src/test
parent24cb323e767a342496cf24e0d06398b5af38ac80 (diff)
downloadspark-cd3093e705b184df1291cd8f03331a9618993693.tar.gz
spark-cd3093e705b184df1291cd8f03331a9618993693.tar.bz2
spark-cd3093e705b184df1291cd8f03331a9618993693.zip
[SPARK-7678] [ML] Fix default random seed in HasSeed
Changed shared param HasSeed to have default based on hashCode of class name, instead of random number. Also, removed fixed random seeds from Word2Vec and ALS. CC: mengxr Author: Joseph K. Bradley <joseph@databricks.com> Closes #6251 from jkbradley/scala-fixed-seed and squashes the following commits: 0e37184 [Joseph K. Bradley] Fixed Word2VecSuite, ALSSuite in spark.ml to use original fixed random seeds 678ec3a [Joseph K. Bradley] Removed fixed random seeds from Word2Vec and ALS. Changed shared param HasSeed to have default based on hashCode of class name, instead of random number. (cherry picked from commit 7b16e9f2118fbfbb1c0ba957161fe500c9aff82a) Signed-off-by: Xiangrui Meng <meng@databricks.com>
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala1
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala16
2 files changed, 10 insertions, 7 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index 03ba86670d..43a09cc418 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -52,6 +52,7 @@ class Word2VecSuite extends FunSuite with MLlibTestSparkContext {
.setVectorSize(3)
.setInputCol("text")
.setOutputCol("result")
+ .setSeed(42L)
.fit(docDF)
model.transform(docDF).select("result", "expected").collect().foreach {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index fc7349330c..6cc6ec94eb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -345,6 +345,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
.setImplicitPrefs(implicitPrefs)
.setNumUserBlocks(numUserBlocks)
.setNumItemBlocks(numItemBlocks)
+ .setSeed(0)
val alpha = als.getAlpha
val model = als.fit(training.toDF())
val predictions = model.transform(test.toDF())
@@ -425,17 +426,18 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
val longRatings = ratings.map(r => Rating(r.user.toLong, r.item.toLong, r.rating))
- val (longUserFactors, _) = ALS.train(longRatings, rank = 2, maxIter = 4)
+ val (longUserFactors, _) = ALS.train(longRatings, rank = 2, maxIter = 4, seed = 0)
assert(longUserFactors.first()._1.getClass === classOf[Long])
val strRatings = ratings.map(r => Rating(r.user.toString, r.item.toString, r.rating))
- val (strUserFactors, _) = ALS.train(strRatings, rank = 2, maxIter = 4)
+ val (strUserFactors, _) = ALS.train(strRatings, rank = 2, maxIter = 4, seed = 0)
assert(strUserFactors.first()._1.getClass === classOf[String])
}
test("nonnegative constraint") {
val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
- val (userFactors, itemFactors) = ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true)
+ val (userFactors, itemFactors) =
+ ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true, seed = 0)
def isNonnegative(factors: RDD[(Int, Array[Float])]): Boolean = {
factors.values.map { _.forall(_ >= 0.0) }.reduce(_ && _)
}
@@ -459,7 +461,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
test("partitioner in returned factors") {
val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
val (userFactors, itemFactors) = ALS.train(
- ratings, rank = 2, maxIter = 4, numUserBlocks = 3, numItemBlocks = 4)
+ ratings, rank = 2, maxIter = 4, numUserBlocks = 3, numItemBlocks = 4, seed = 0)
for ((tpe, factors) <- Seq(("User", userFactors), ("Item", itemFactors))) {
assert(userFactors.partitioner.isDefined, s"$tpe factors should have partitioner.")
val part = userFactors.partitioner.get
@@ -476,8 +478,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
test("als with large number of iterations") {
val (ratings, _) = genExplicitTestData(numUsers = 4, numItems = 4, rank = 1)
- ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2)
- ALS.train(
- ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, implicitPrefs = true)
+ ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, seed = 0)
+ ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2,
+ implicitPrefs = true, seed = 0)
}
}