From 28bcb9e9e86a4b643fbf96b2b7e03928ebcfc060 Mon Sep 17 00:00:00 2001 From: mbonaci Date: Fri, 20 Mar 2015 18:30:45 +0000 Subject: [SPARK-6370][core] Documentation: Improve all 3 docs for RDD.sample The docs for the `sample` method were insufficient, now less so. Author: mbonaci Closes #5097 from mbonaci/master and squashes the following commits: a6a9d97 [mbonaci] [SPARK-6370][core] Documentation: Improve all 3 docs for RDD.sample method --- python/pyspark/rdd.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'python/pyspark') diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index bf17f513c0..c337a43c8a 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -346,6 +346,12 @@ class RDD(object): """ Return a sampled subset of this RDD. + :param withReplacement: can elements be sampled multiple times (replaced when sampled out) + :param fraction: expected size of the sample as a fraction of this RDD's size + without replacement: probability that each element is chosen; fraction must be [0, 1] + with replacement: expected number of times each element is chosen; fraction must be >= 0 + :param seed: seed for the random number generator + >>> rdd = sc.parallelize(range(100), 4) >>> rdd.sample(False, 0.1, 81).count() 10 -- cgit v1.2.3