aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/scala/org
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2015-07-01 23:05:45 -0700
committerAndrew Or <andrew@databricks.com>2015-07-01 23:05:45 -0700
commit15d41cc501f5fa7ac82c4a6741e416bb557f610a (patch)
treee054eccafda151ad2ac1edc795301aa300150a99 /core/src/main/scala/org
parentd14338eafc5d633f766bd52ba610fd7c4fe90581 (diff)
downloadspark-15d41cc501f5fa7ac82c4a6741e416bb557f610a.tar.gz
spark-15d41cc501f5fa7ac82c4a6741e416bb557f610a.tar.bz2
spark-15d41cc501f5fa7ac82c4a6741e416bb557f610a.zip
[SPARK-8769] [TRIVIAL] [DOCS] toLocalIterator should mention it results in many jobs
Author: Holden Karau <holden@pigscanfly.ca> Closes #7171 from holdenk/SPARK-8769-toLocalIterator-documentation-improvement and squashes the following commits: 97ddd99 [Holden Karau] Add note
Diffstat (limited to 'core/src/main/scala/org')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala4
1 files changed, 4 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 10610f4b6f..cac6e3b477 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -890,6 +890,10 @@ abstract class RDD[T: ClassTag](
* Return an iterator that contains all of the elements in this RDD.
*
* The iterator will consume as much memory as the largest partition in this RDD.
+ *
+ * Note: this results in multiple Spark jobs, and if the input RDD is the result
+ * of a wide transformation (e.g. join with different partitioners), to avoid
+ * recomputing the input RDD should be cached first.
*/
def toLocalIterator: Iterator[T] = withScope {
def collectPartition(p: Int): Array[T] = {