[SPARK-1822] SchemaRDD.count() should use query optimizer

Author: Kan Zhang <kzhang@apache.org> Closes #841 from kanzhang/SPARK-1822 and squashes the following commits: 2f8072a [Kan Zhang] [SPARK-1822] Minor style update cf4baa4 [Kan Zhang] [SPARK-1822] Adding Scaladoc e67c910 [Kan Zhang] [SPARK-1822] SchemaRDD.count() should use optimizer
author: Kan Zhang <kzhang@apache.org> 2014-05-25 00:06:42 -0700
committer: Reynold Xin <rxin@apache.org> 2014-05-25 00:06:42 -0700
commit: 6052db9dc10c996215658485e805200e4f0cf549 (patch)
tree: c5575603da300b1dbdcce95f7bdff9457bc26094 /python
parent: 6e9fb6320bec3371bc9c010ccbc1b915f500486b (diff)
download: spark-6052db9dc10c996215658485e805200e4f0cf549.tar.gz
spark-6052db9dc10c996215658485e805200e4f0cf549.tar.bz2
spark-6052db9dc10c996215658485e805200e4f0cf549.zip
1 files changed, 13 insertions, 1 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index bbe69e7d8f..f2001afae4 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -268,7 +268,7 @@ class SchemaRDD(RDD):
     def _jrdd(self):
         """
         Lazy evaluation of PythonRDD object. Only done when a user calls methods defined by the
-        L{pyspark.rdd.RDD} super class (map, count, etc.).
+        L{pyspark.rdd.RDD} super class (map, filter, etc.).
         """
         if not hasattr(self, '_lazy_jrdd'):
             self._lazy_jrdd = self._toPython()._jrdd
@@ -321,6 +321,18 @@ class SchemaRDD(RDD):
         """
         self._jschema_rdd.saveAsTable(tableName)
 
+    def count(self):
+        """
+        Return the number of elements in this RDD.
+
+        >>> srdd = sqlCtx.inferSchema(rdd)
+        >>> srdd.count()
+        3L
+        >>> srdd.count() == srdd.map(lambda x: x).count()
+        True
+        """
+        return self._jschema_rdd.count()
+
     def _toPython(self):
         # We have to import the Row class explicitly, so that the reference Pickler has is
         # pyspark.sql.Row instead of __main__.Row
author	Kan Zhang <kzhang@apache.org>	2014-05-25 00:06:42 -0700
committer	Reynold Xin <rxin@apache.org>	2014-05-25 00:06:42 -0700
commit	6052db9dc10c996215658485e805200e4f0cf549 (patch)
tree	c5575603da300b1dbdcce95f7bdff9457bc26094 /python
parent	6e9fb6320bec3371bc9c010ccbc1b915f500486b (diff)
download	spark-6052db9dc10c996215658485e805200e4f0cf549.tar.gz spark-6052db9dc10c996215658485e805200e4f0cf549.tar.bz2 spark-6052db9dc10c996215658485e805200e4f0cf549.zip