aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-01-19 22:50:44 -0800
committerPatrick Wendell <patrick@databricks.com>2015-01-19 22:50:45 -0800
commit306ff187af0c49f61f4bc1850021561397b4f8f1 (patch)
tree3b6fc4dbaaf50e8437c4e9a83ce85826380237d2 /python
parente69fb8c75aab7b95abf03785c3b2f1384373003a (diff)
downloadspark-306ff187af0c49f61f4bc1850021561397b4f8f1.tar.gz
spark-306ff187af0c49f61f4bc1850021561397b4f8f1.tar.bz2
spark-306ff187af0c49f61f4bc1850021561397b4f8f1.zip
SPARK-5270 [CORE] Provide isEmpty() function in RDD API
Pretty minor, but submitted for consideration -- this would at least help people make this check in the most efficient way I know. Author: Sean Owen <sowen@cloudera.com> Closes #4074 from srowen/SPARK-5270 and squashes the following commits: 66885b8 [Sean Owen] Add note that JavaRDDLike should not be implemented by user code 2e9b490 [Sean Owen] More tests, and Mima-exclude the new isEmpty method in JavaRDDLike 28395ff [Sean Owen] Add isEmpty to Java, Python 7dd04b7 [Sean Owen] Add efficient RDD.isEmpty()
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/rdd.py12
1 files changed, 12 insertions, 0 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index c1120cf781..4977400ac1 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1130,6 +1130,18 @@ class RDD(object):
return rs[0]
raise ValueError("RDD is empty")
+ def isEmpty(self):
+ """
+ Returns true if and only if the RDD contains no elements at all. Note that an RDD
+ may be empty even when it has at least 1 partition.
+
+ >>> sc.parallelize([]).isEmpty()
+ True
+ >>> sc.parallelize([1]).isEmpty()
+ False
+ """
+ return self._jrdd.partitions().size() == 0 or len(self.take(1)) == 0
+
def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
"""
Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file