diff options
author | mbonaci <mbonaci@gmail.com> | 2015-03-20 18:30:45 +0000 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-03-20 18:33:53 +0000 |
commit | 28bcb9e9e86a4b643fbf96b2b7e03928ebcfc060 (patch) | |
tree | 24985af7a3e26e1c852e3e9615d4eb188ff78f08 /core/src | |
parent | db4d317ccfdd9bd1dc7e8beac54ebcc35966b7d5 (diff) | |
download | spark-28bcb9e9e86a4b643fbf96b2b7e03928ebcfc060.tar.gz spark-28bcb9e9e86a4b643fbf96b2b7e03928ebcfc060.tar.bz2 spark-28bcb9e9e86a4b643fbf96b2b7e03928ebcfc060.zip |
[SPARK-6370][core] Documentation: Improve all 3 docs for RDD.sample
The docs for the `sample` method were insufficient, now less so.
Author: mbonaci <mbonaci@gmail.com>
Closes #5097 from mbonaci/master and squashes the following commits:
a6a9d97 [mbonaci] [SPARK-6370][core] Documentation: Improve all 3 docs for RDD.sample method
Diffstat (limited to 'core/src')
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala | 11 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/RDD.scala | 6 |
2 files changed, 17 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala index 645dc3bfb6..3e9beb670f 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala @@ -101,12 +101,23 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T]) /** * Return a sampled subset of this RDD. + * + * @param withReplacement can elements be sampled multiple times (replaced when sampled out) + * @param fraction expected size of the sample as a fraction of this RDD's size + * without replacement: probability that each element is chosen; fraction must be [0, 1] + * with replacement: expected number of times each element is chosen; fraction must be >= 0 */ def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] = sample(withReplacement, fraction, Utils.random.nextLong) /** * Return a sampled subset of this RDD. + * + * @param withReplacement can elements be sampled multiple times (replaced when sampled out) + * @param fraction expected size of the sample as a fraction of this RDD's size + * without replacement: probability that each element is chosen; fraction must be [0, 1] + * with replacement: expected number of times each element is chosen; fraction must be >= 0 + * @param seed seed for the random number generator */ def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] = wrapRDD(rdd.sample(withReplacement, fraction, seed)) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index a139780d96..a4c74ed03e 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -377,6 +377,12 @@ abstract class RDD[T: ClassTag]( /** * Return a sampled subset of this RDD. + * + * @param withReplacement can elements be sampled multiple times (replaced when sampled out) + * @param fraction expected size of the sample as a fraction of this RDD's size + * without replacement: probability that each element is chosen; fraction must be [0, 1] + * with replacement: expected number of times each element is chosen; fraction must be >= 0 + * @param seed seed for the random number generator */ def sample(withReplacement: Boolean, fraction: Double, |