diff options
author | Doris Xin <doris.s.xin@gmail.com> | 2014-07-24 23:42:08 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2014-07-24 23:42:08 -0700 |
commit | 2f75a4a30e1a3fdf384475b9660c6c43f093f68c (patch) | |
tree | eb50e720cae6842bcb242d030adb27fba92c3f62 /core/src | |
parent | 14174abd421318e71c16edd24224fd5094bdfed4 (diff) | |
download | spark-2f75a4a30e1a3fdf384475b9660c6c43f093f68c.tar.gz spark-2f75a4a30e1a3fdf384475b9660c6c43f093f68c.tar.bz2 spark-2f75a4a30e1a3fdf384475b9660c6c43f093f68c.zip |
[SPARK-2656] Python version of stratified sampling
exact sample size not supported for now.
Author: Doris Xin <doris.s.xin@gmail.com>
Closes #1554 from dorx/pystratified and squashes the following commits:
4ba927a [Doris Xin] use rel diff (+- 50%) instead of abs diff (+- 50)
bdc3f8b [Doris Xin] updated unit to check sample holistically
7713c7b [Doris Xin] Python version of stratified sampling
Diffstat (limited to 'core/src')
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/RDD.scala | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index c1bafab3e7..edbf7eace9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -354,7 +354,7 @@ abstract class RDD[T: ClassTag]( def sample(withReplacement: Boolean, fraction: Double, seed: Long = Utils.random.nextLong): RDD[T] = { - require(fraction >= 0.0, "Invalid fraction value: " + fraction) + require(fraction >= 0.0, "Negative fraction value: " + fraction) if (withReplacement) { new PartitionwiseSampledRDD[T, T](this, new PoissonSampler[T](fraction), true, seed) } else { |