aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/dataframe.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r--python/pyspark/sql/dataframe.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 7a69c4c70c..d473d6b534 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -241,6 +241,20 @@ class DataFrame(object):
return list(_load_from_socket(port, BatchedSerializer(PickleSerializer())))
@ignore_unicode_prefix
+ @since(2.0)
+ def toLocalIterator(self):
+ """
+ Returns an iterator that contains all of the rows in this :class:`DataFrame`.
+ The iterator will consume as much memory as the largest partition in this DataFrame.
+
+ >>> list(df.toLocalIterator())
+ [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+ """
+ with SCCallSiteSync(self._sc) as css:
+ port = self._jdf.toPythonIterator()
+ return _load_from_socket(port, BatchedSerializer(PickleSerializer()))
+
+ @ignore_unicode_prefix
@since(1.3)
def limit(self, num):
"""Limits the result count to the number specified.