diff options
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r-- | python/pyspark/sql/dataframe.py | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 7a69c4c70c..b4fa836893 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -60,7 +60,7 @@ class DataFrame(object): people = sqlContext.read.parquet("...") department = sqlContext.read.parquet("...") - people.filter(people.age > 30).join(department, people.deptId == department.id)) \ + people.filter(people.age > 30).join(department, people.deptId == department.id)\ .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"}) .. note:: Experimental @@ -241,6 +241,20 @@ class DataFrame(object): return list(_load_from_socket(port, BatchedSerializer(PickleSerializer()))) @ignore_unicode_prefix + @since(2.0) + def toLocalIterator(self): + """ + Returns an iterator that contains all of the rows in this :class:`DataFrame`. + The iterator will consume as much memory as the largest partition in this DataFrame. + + >>> list(df.toLocalIterator()) + [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')] + """ + with SCCallSiteSync(self._sc) as css: + port = self._jdf.toPythonIterator() + return _load_from_socket(port, BatchedSerializer(PickleSerializer())) + + @ignore_unicode_prefix @since(1.3) def limit(self, num): """Limits the result count to the number specified. |