aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2014-04-16 09:33:27 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-04-16 09:33:27 -0700
commitc3527a333a0877f4b49614f3fd1f041b01749651 (patch)
treeba2c97de461cabdfe2cc0d786b1ff65f5a5b557e /core
parent9edd88782e0268439c5ab57400d6a7ab432fc269 (diff)
downloadspark-c3527a333a0877f4b49614f3fd1f041b01749651.tar.gz
spark-c3527a333a0877f4b49614f3fd1f041b01749651.tar.bz2
spark-c3527a333a0877f4b49614f3fd1f041b01749651.zip
SPARK-1310: Start adding k-fold cross validation to MLLib [adds kFold to MLUtils & fixes bug in BernoulliSampler]
Author: Holden Karau <holden@pigscanfly.ca> Closes #18 from holdenk/addkfoldcrossvalidation and squashes the following commits: 208db9b [Holden Karau] Fix a bad space e84f2fc [Holden Karau] Fix the test, we should be looking at the second element instead 6ddbf05 [Holden Karau] swap training and validation order 7157ae9 [Holden Karau] CR feedback 90896c7 [Holden Karau] New line 150889c [Holden Karau] Fix up error messages in the MLUtilsSuite 2cb90b3 [Holden Karau] Fix the names in kFold c702a96 [Holden Karau] Fix imports in MLUtils e187e35 [Holden Karau] Move { up to same line as whenExecuting(random) in RandomSamplerSuite.scala c5b723f [Holden Karau] clean up 7ebe4d5 [Holden Karau] CR feedback, remove unecessary learners (came back during merge mistake) and insert an empty line bb5fa56 [Holden Karau] extra line sadness 163c5b1 [Holden Karau] code review feedback 1.to -> 1 to and folds -> numFolds 5a33f1d [Holden Karau] Code review follow up. e8741a7 [Holden Karau] CR feedback b78804e [Holden Karau] Remove cross validation [TODO in another pull request] 91eae64 [Holden Karau] Consolidate things in mlutils 264502a [Holden Karau] Add a test for the bug that was found with BernoulliSampler not copying the complement param dd0b737 [Holden Karau] Wrap long lines (oops) c0b7fa4 [Holden Karau] Switch FoldedRDD to use BernoulliSampler and PartitionwiseSampledRDD 08f8e4d [Holden Karau] Fix BernoulliSampler to respect complement a751ec6 [Holden Karau] Add k-fold cross validation to MLLib
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala7
-rw-r--r--core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala24
2 files changed, 22 insertions, 9 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 37a6b04f52..4dc8ada00a 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -69,7 +69,12 @@ class BernoulliSampler[T](lb: Double, ub: Double, complement: Boolean = false)
}
}
- override def clone = new BernoulliSampler[T](lb, ub)
+ /**
+ * Return a sampler with is the complement of the range specified of the current sampler.
+ */
+ def cloneComplement(): BernoulliSampler[T] = new BernoulliSampler[T](lb, ub, !complement)
+
+ override def clone = new BernoulliSampler[T](lb, ub, complement)
}
/**
diff --git a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
index 7576c9a51f..e166787f17 100644
--- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
@@ -41,21 +41,31 @@ class RandomSamplerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
random.nextDouble().andReturn(x)
}
}
- whenExecuting(random)
- {
+ whenExecuting(random) {
val sampler = new BernoulliSampler[Int](0.25, 0.55)(random)
assert(sampler.sample(a.iterator).toList == List(3, 4, 5))
}
}
+ test("BernoulliSamplerWithRangeInverse") {
+ expecting {
+ for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
+ random.nextDouble().andReturn(x)
+ }
+ }
+ whenExecuting(random) {
+ val sampler = new BernoulliSampler[Int](0.25, 0.55, true)(random)
+ assert(sampler.sample(a.iterator).toList === List(1, 2, 6, 7, 8, 9))
+ }
+ }
+
test("BernoulliSamplerWithRatio") {
expecting {
for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
random.nextDouble().andReturn(x)
}
}
- whenExecuting(random)
- {
+ whenExecuting(random) {
val sampler = new BernoulliSampler[Int](0.35)(random)
assert(sampler.sample(a.iterator).toList == List(1, 2, 3))
}
@@ -67,8 +77,7 @@ class RandomSamplerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
random.nextDouble().andReturn(x)
}
}
- whenExecuting(random)
- {
+ whenExecuting(random) {
val sampler = new BernoulliSampler[Int](0.25, 0.55, true)(random)
assert(sampler.sample(a.iterator).toList == List(1, 2, 6, 7, 8, 9))
}
@@ -78,8 +87,7 @@ class RandomSamplerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
expecting {
random.setSeed(10L)
}
- whenExecuting(random)
- {
+ whenExecuting(random) {
val sampler = new BernoulliSampler[Int](0.2)(random)
sampler.setSeed(10L)
}