aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/SparkContext.scala13
1 files changed, 11 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 8fbda2c667..35970c2f50 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -434,12 +434,21 @@ class SparkContext(config: SparkConf) extends Logging {
// Methods for creating RDDs
- /** Distribute a local Scala collection to form an RDD. */
+ /** Distribute a local Scala collection to form an RDD.
+ *
+ * @note Parallelize acts lazily. If `seq` is a mutable collection and is
+ * altered after the call to parallelize and before the first action on the
+ * RDD, the resultant RDD will reflect the modified collection. Pass a copy of
+ * the argument to avoid this.
+ */
def parallelize[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
new ParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]())
}
- /** Distribute a local Scala collection to form an RDD. */
+ /** Distribute a local Scala collection to form an RDD.
+ *
+ * This method is identical to `parallelize`.
+ */
def makeRDD[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
parallelize(seq, numSlices)
}