diff options
Diffstat (limited to 'python/pyspark/rdd.py')
-rw-r--r-- | python/pyspark/rdd.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 2de2c2fd1a..a163ceafe9 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -386,6 +386,11 @@ class RDD(object): with replacement: expected number of times each element is chosen; fraction must be >= 0 :param seed: seed for the random number generator + .. note:: + + This is not guaranteed to provide exactly the fraction specified of the total count + of the given :class:`DataFrame`. + >>> rdd = sc.parallelize(range(100), 4) >>> 6 <= rdd.sample(False, 0.1, 81).count() <= 14 True |