aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBinh Nguyen <ngbinh@gmail.com>2013-12-10 00:38:16 -0800
committerBinh Nguyen <ngbinh@gmail.com>2013-12-10 00:38:16 -0800
commit5013fb64b27e46bbc5daf4f06fdc70938c06cf29 (patch)
tree0758aca944f9e185e56a202c019aa67ca9827372
parent6169fe14a140146602fb07cfcd13eee6efad98f9 (diff)
downloadspark-5013fb64b27e46bbc5daf4f06fdc70938c06cf29.tar.gz
spark-5013fb64b27e46bbc5daf4f06fdc70938c06cf29.tar.bz2
spark-5013fb64b27e46bbc5daf4f06fdc70938c06cf29.zip
Expose numPartitions parameter in JavaPairRDD.sortByKey()
This change make Java and Scala API on sortByKey() the same.
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala12
1 files changed, 10 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 2142fd7327..a191dfd1dc 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -583,12 +583,20 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
* (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
* order of the keys).
*/
- def sortByKey(comp: Comparator[K], ascending: Boolean): JavaPairRDD[K, V] = {
+ def sortByKey(comp: Comparator[K], ascending: Boolean): JavaPairRDD[K, V] = sortByKey(comp, ascending, rdd.partitions.size)
+
+ /**
+ * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
+ * `collect` or `save` on the resulting RDD will return or output an ordered list of records
+ * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
+ * order of the keys).
+ */
+ def sortByKey(comp: Comparator[K], ascending: Boolean, numPartitions: Int): JavaPairRDD[K, V] = {
class KeyOrdering(val a: K) extends Ordered[K] {
override def compare(b: K) = comp.compare(a, b)
}
implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x)
- fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending))
+ fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending, numPartitions))
}
/**