From 9cc3a6d3c0a64b80af77ae358c58d4b29b18c534 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Thu, 19 Dec 2013 11:49:17 -0800 Subject: Add comment explaining collectPartitions's use --- core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index 458d9dcbc3..f344804b4c 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -248,6 +248,8 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Return an array that contains all of the elements in a specific partition of this RDD. */ def collectPartitions(partitionIds: Array[Int]): Array[JList[T]] = { + // This is useful for implementing `take` from other language frontends + // like Python where the data is serialized. import scala.collection.JavaConversions._ val res = context.runJob(rdd, (it: Iterator[T]) => it.toArray, partitionIds, true) res.map(x => new java.util.ArrayList(x.toSeq)).toArray -- cgit v1.2.3