aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/scala/org
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/main/scala/org')
-rw-r--r--core/src/main/scala/org/apache/spark/Accumulator.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala32
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala24
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala2
5 files changed, 33 insertions, 33 deletions
diff --git a/core/src/main/scala/org/apache/spark/Accumulator.scala b/core/src/main/scala/org/apache/spark/Accumulator.scala
index 5e8f1d4a70..0e4bcc33cb 100644
--- a/core/src/main/scala/org/apache/spark/Accumulator.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulator.scala
@@ -29,9 +29,9 @@ import org.apache.spark.storage.{BlockId, BlockStatus}
/**
* A simpler value of [[Accumulable]] where the result type being accumulated is the same
* as the types of elements being merged, i.e. variables that are only "added" to through an
- * associative operation and can therefore be efficiently supported in parallel. They can be used
- * to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of numeric
- * value types, and programmers can add support for new types.
+ * associative and commutative operation and can therefore be efficiently supported in parallel.
+ * They can be used to implement counters (as in MapReduce) or sums. Spark natively supports
+ * accumulators of numeric value types, and programmers can add support for new types.
*
* An accumulator is created from an initial value `v` by calling [[SparkContext#accumulator]].
* Tasks running on the cluster can then add to it using the [[Accumulable#+=]] operator.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 94d103588b..e080f91f50 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -278,17 +278,17 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
combineByKey(createCombiner, mergeValue, mergeCombiners, new HashPartitioner(numPartitions))
/**
- * Merge the values for each key using an associative reduce function. This will also perform
- * the merging locally on each mapper before sending results to a reducer, similarly to a
- * "combiner" in MapReduce.
+ * Merge the values for each key using an associative and commutative reduce function. This will
+ * also perform the merging locally on each mapper before sending results to a reducer, similarly
+ * to a "combiner" in MapReduce.
*/
def reduceByKey(partitioner: Partitioner, func: JFunction2[V, V, V]): JavaPairRDD[K, V] =
fromRDD(rdd.reduceByKey(partitioner, func))
/**
- * Merge the values for each key using an associative reduce function, but return the results
- * immediately to the master as a Map. This will also perform the merging locally on each mapper
- * before sending results to a reducer, similarly to a "combiner" in MapReduce.
+ * Merge the values for each key using an associative and commutative reduce function, but return
+ * the result immediately to the master as a Map. This will also perform the merging locally on
+ * each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce.
*/
def reduceByKeyLocally(func: JFunction2[V, V, V]): java.util.Map[K, V] =
mapAsSerializableJavaMap(rdd.reduceByKeyLocally(func))
@@ -381,9 +381,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
fromRDD(rdd.foldByKey(zeroValue)(func))
/**
- * Merge the values for each key using an associative reduce function. This will also perform
- * the merging locally on each mapper before sending results to a reducer, similarly to a
- * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
+ * Merge the values for each key using an associative and commutative reduce function. This will
+ * also perform the merging locally on each mapper before sending results to a reducer, similarly
+ * to a "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
*/
def reduceByKey(func: JFunction2[V, V, V], numPartitions: Int): JavaPairRDD[K, V] =
fromRDD(rdd.reduceByKey(func, numPartitions))
@@ -461,10 +461,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
fromRDD(rdd.partitionBy(partitioner))
/**
- * Merge the values for each key using an associative reduce function. This will also perform
- * the merging locally on each mapper before sending results to a reducer, similarly to a
- * "combiner" in MapReduce.
- */
+ * Return an RDD containing all pairs of elements with matching keys in `this` and `other`. Each
+ * pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in `this` and
+ * (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD.
+ */
def join[W](other: JavaPairRDD[K, W], partitioner: Partitioner): JavaPairRDD[K, (V, W)] =
fromRDD(rdd.join(other, partitioner))
@@ -520,9 +520,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
}
/**
- * Merge the values for each key using an associative reduce function. This will also perform
- * the merging locally on each mapper before sending results to a reducer, similarly to a
- * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
+ * Merge the values for each key using an associative and commutative reduce function. This will
+ * also perform the merging locally on each mapper before sending results to a reducer, similarly
+ * to a "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
* parallelism level.
*/
def reduceByKey(func: JFunction2[V, V, V]): JavaPairRDD[K, V] = {
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 37c211fe70..4212027122 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -373,7 +373,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Aggregate the elements of each partition, and then the results for all the partitions, using a
- * given associative and commutative function and a neutral "zero value". The function
+ * given associative function and a neutral "zero value". The function
* op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object
* allocation; however, it should not modify t2.
*
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 61905a8421..e00b9f6cfd 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -300,27 +300,27 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
}
/**
- * Merge the values for each key using an associative reduce function. This will also perform
- * the merging locally on each mapper before sending results to a reducer, similarly to a
- * "combiner" in MapReduce.
+ * Merge the values for each key using an associative and commutative reduce function. This will
+ * also perform the merging locally on each mapper before sending results to a reducer, similarly
+ * to a "combiner" in MapReduce.
*/
def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDD[(K, V)] = self.withScope {
combineByKeyWithClassTag[V]((v: V) => v, func, func, partitioner)
}
/**
- * Merge the values for each key using an associative reduce function. This will also perform
- * the merging locally on each mapper before sending results to a reducer, similarly to a
- * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
+ * Merge the values for each key using an associative and commutative reduce function. This will
+ * also perform the merging locally on each mapper before sending results to a reducer, similarly
+ * to a "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
*/
def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = self.withScope {
reduceByKey(new HashPartitioner(numPartitions), func)
}
/**
- * Merge the values for each key using an associative reduce function. This will also perform
- * the merging locally on each mapper before sending results to a reducer, similarly to a
- * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
+ * Merge the values for each key using an associative and commutative reduce function. This will
+ * also perform the merging locally on each mapper before sending results to a reducer, similarly
+ * to a "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
* parallelism level.
*/
def reduceByKey(func: (V, V) => V): RDD[(K, V)] = self.withScope {
@@ -328,9 +328,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
}
/**
- * Merge the values for each key using an associative reduce function, but return the results
- * immediately to the master as a Map. This will also perform the merging locally on each mapper
- * before sending results to a reducer, similarly to a "combiner" in MapReduce.
+ * Merge the values for each key using an associative and commutative reduce function, but return
+ * the results immediately to the master as a Map. This will also perform the merging locally on
+ * each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce.
*/
def reduceByKeyLocally(func: (V, V) => V): Map[K, V] = self.withScope {
val cleanedF = self.sparkContext.clean(func)
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index a81a98b526..6a6ad2d75a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -973,7 +973,7 @@ abstract class RDD[T: ClassTag](
/**
* Aggregate the elements of each partition, and then the results for all the partitions, using a
- * given associative and commutative function and a neutral "zero value". The function
+ * given associative function and a neutral "zero value". The function
* op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object
* allocation; however, it should not modify t2.
*