aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/rdd.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d3148de6f4..cb12fed98c 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2111,6 +2111,7 @@ class RDD(object):
def countApprox(self, timeout, confidence=0.95):
"""
.. note:: Experimental
+
Approximate version of count() that returns a potentially incomplete
result within a timeout, even if not all tasks have finished.
@@ -2124,6 +2125,7 @@ class RDD(object):
def sumApprox(self, timeout, confidence=0.95):
"""
.. note:: Experimental
+
Approximate operation to return the sum within a timeout
or meet the confidence.
@@ -2140,6 +2142,7 @@ class RDD(object):
def meanApprox(self, timeout, confidence=0.95):
"""
.. note:: Experimental
+
Approximate operation to return the mean within a timeout
or meet the confidence.
@@ -2156,6 +2159,7 @@ class RDD(object):
def countApproxDistinct(self, relativeSD=0.05):
"""
.. note:: Experimental
+
Return approximate number of distinct elements in the RDD.
The algorithm used is based on streamlib's implementation of