aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorReynold Xin <rxin@apache.org>2014-05-25 01:44:49 -0700
committerReynold Xin <rxin@apache.org>2014-05-25 01:44:49 -0700
commitd66642e3978a76977414c2fdaedebaad35662667 (patch)
tree863c953cda9729a99b0749f452483e42822dd1bb /python
parent5c7faecd75ea59454ad3209390ac078e6cf6e4a6 (diff)
downloadspark-d66642e3978a76977414c2fdaedebaad35662667.tar.gz
spark-d66642e3978a76977414c2fdaedebaad35662667.tar.bz2
spark-d66642e3978a76977414c2fdaedebaad35662667.zip
SPARK-1822: Some minor cleanup work on SchemaRDD.count()
Minor cleanup following #841. Author: Reynold Xin <rxin@apache.org> Closes #868 from rxin/schema-count and squashes the following commits: 5442651 [Reynold Xin] SPARK-1822: Some minor cleanup work on SchemaRDD.count()
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index f2001afae4..fa4b9c7b68 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -323,7 +323,10 @@ class SchemaRDD(RDD):
def count(self):
"""
- Return the number of elements in this RDD.
+ Return the number of elements in this RDD. Unlike the base RDD
+ implementation of count, this implementation leverages the query
+ optimizer to compute the count on the SchemaRDD, which supports
+ features such as filter pushdown.
>>> srdd = sqlCtx.inferSchema(rdd)
>>> srdd.count()