aboutsummaryrefslogtreecommitdiff
path: root/core/src/main
diff options
context:
space:
mode:
authorTommy YU <tummyyu@163.com>2016-02-06 17:29:09 +0000
committerSean Owen <sowen@cloudera.com>2016-02-06 17:29:09 +0000
commit81da3bee669aaeb79ec68baaf7c99bff6e5d14fe (patch)
tree269905def89ac9cd4cf438d1b45e2d261b0531b4 /core/src/main
parent4f28291f851b9062da3941e63de4eabb0c77f5d0 (diff)
downloadspark-81da3bee669aaeb79ec68baaf7c99bff6e5d14fe.tar.gz
spark-81da3bee669aaeb79ec68baaf7c99bff6e5d14fe.tar.bz2
spark-81da3bee669aaeb79ec68baaf7c99bff6e5d14fe.zip
[SPARK-5865][API DOC] Add doc warnings for methods that return local data structures
rxin srowen I work out note message for rdd.take function, please help to review. If it's fine, I can apply to all other function later. Author: Tommy YU <tummyyu@163.com> Closes #10874 from Wenpei/spark-5865-add-warning-for-localdatastructure.
Diffstat (limited to 'core/src/main')
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala3
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala24
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala3
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala15
4 files changed, 45 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index fb04472ee7..94d103588b 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -636,6 +636,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
/**
* Return the key-value pairs in this RDD to the master as a Map.
+ *
+ * @note this method should only be used if the resulting data is expected to be small, as
+ * all the data is loaded into the driver's memory.
*/
def collectAsMap(): java.util.Map[K, V] = mapAsSerializableJavaMap(rdd.collectAsMap())
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 7340defabf..37c211fe70 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -327,6 +327,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Return an array that contains all of the elements in this RDD.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
*/
def collect(): JList[T] =
rdd.collect().toSeq.asJava
@@ -465,6 +468,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
* Take the first num elements of the RDD. This currently scans the partitions *one by one*, so
* it will be slow if a lot of partitions are required. In that case, use collect() to get the
* whole RDD instead.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
*/
def take(num: Int): JList[T] =
rdd.take(num).toSeq.asJava
@@ -548,6 +554,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Returns the top k (largest) elements from this RDD as defined by
* the specified Comparator[T] and maintains the order.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
* @param num k, the number of top elements to return
* @param comp the comparator that defines the order
* @return an array of top elements
@@ -559,6 +568,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Returns the top k (largest) elements from this RDD using the
* natural ordering for T and maintains the order.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
* @param num k, the number of top elements to return
* @return an array of top elements
*/
@@ -570,6 +582,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Returns the first k (smallest) elements from this RDD as defined by
* the specified Comparator[T] and maintains the order.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
* @param num k, the number of elements to return
* @param comp the comparator that defines the order
* @return an array of top elements
@@ -601,6 +616,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Returns the first k (smallest) elements from this RDD using the
* natural ordering for T while maintain the order.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
* @param num k, the number of top elements to return
* @return an array of top elements
*/
@@ -634,6 +652,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* The asynchronous version of `collect`, which returns a future for
* retrieving an array containing all of the elements in this RDD.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
*/
def collectAsync(): JavaFutureAction[JList[T]] = {
new JavaFutureActionWrapper(rdd.collectAsync(), (x: Seq[T]) => x.asJava)
@@ -642,6 +663,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* The asynchronous version of the `take` action, which returns a
* future for retrieving the first `num` elements of this RDD.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
*/
def takeAsync(num: Int): JavaFutureAction[JList[T]] = {
new JavaFutureActionWrapper(rdd.takeAsync(num), (x: Seq[T]) => x.asJava)
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 33f2f0b44f..61905a8421 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -726,6 +726,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
*
* Warning: this doesn't return a multimap (so if you have multiple values to the same key, only
* one value per key is preserved in the map returned)
+ *
+ * @note this method should only be used if the resulting data is expected to be small, as
+ * all the data is loaded into the driver's memory.
*/
def collectAsMap(): Map[K, V] = self.withScope {
val data = self.collect()
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index e8157cf4eb..a81a98b526 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -481,6 +481,9 @@ abstract class RDD[T: ClassTag](
/**
* Return a fixed-size sampled subset of this RDD in an array
*
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
+ *
* @param withReplacement whether sampling is done with replacement
* @param num size of the returned sample
* @param seed seed for the random number generator
@@ -836,6 +839,9 @@ abstract class RDD[T: ClassTag](
/**
* Return an array that contains all of the elements in this RDD.
+ *
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
*/
def collect(): Array[T] = withScope {
val results = sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
@@ -1202,6 +1208,9 @@ abstract class RDD[T: ClassTag](
* results from that partition to estimate the number of additional partitions needed to satisfy
* the limit.
*
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
+ *
* @note due to complications in the internal implementation, this method will raise
* an exception if called on an RDD of `Nothing` or `Null`.
*/
@@ -1263,6 +1272,9 @@ abstract class RDD[T: ClassTag](
* // returns Array(6, 5)
* }}}
*
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
+ *
* @param num k, the number of top elements to return
* @param ord the implicit ordering for T
* @return an array of top elements
@@ -1283,6 +1295,9 @@ abstract class RDD[T: ClassTag](
* // returns Array(2, 3)
* }}}
*
+ * @note this method should only be used if the resulting array is expected to be small, as
+ * all the data is loaded into the driver's memory.
+ *
* @param num k, the number of elements to return
* @param ord the implicit ordering for T
* @return an array of top elements