From 15d41cc501f5fa7ac82c4a6741e416bb557f610a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 1 Jul 2015 23:05:45 -0700 Subject: [SPARK-8769] [TRIVIAL] [DOCS] toLocalIterator should mention it results in many jobs Author: Holden Karau Closes #7171 from holdenk/SPARK-8769-toLocalIterator-documentation-improvement and squashes the following commits: 97ddd99 [Holden Karau] Add note --- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'core') diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 10610f4b6f..cac6e3b477 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -890,6 +890,10 @@ abstract class RDD[T: ClassTag]( * Return an iterator that contains all of the elements in this RDD. * * The iterator will consume as much memory as the largest partition in this RDD. + * + * Note: this results in multiple Spark jobs, and if the input RDD is the result + * of a wide transformation (e.g. join with different partitioners), to avoid + * recomputing the input RDD should be cached first. */ def toLocalIterator: Iterator[T] = withScope { def collectPartition(p: Int): Array[T] = { -- cgit v1.2.3