diff options
author | Rui Li <rui.li@intel.com> | 2014-07-31 15:07:26 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@apache.org> | 2014-07-31 15:07:26 -0700 |
commit | 492a195c5c4d68c85b8b1b48e3aa85165bbb5dc3 (patch) | |
tree | 288ffa2980a66acc3bf1818e64af87c0fb8d3485 /core/src | |
parent | cc820502fb08f71b03237103153c34487b2600b4 (diff) | |
download | spark-492a195c5c4d68c85b8b1b48e3aa85165bbb5dc3.tar.gz spark-492a195c5c4d68c85b8b1b48e3aa85165bbb5dc3.tar.bz2 spark-492a195c5c4d68c85b8b1b48e3aa85165bbb5dc3.zip |
SPARK-2740: allow user to specify ascending and numPartitions for sortBy...
It should be more convenient if user can specify ascending and numPartitions when calling sortByKey.
Author: Rui Li <rui.li@intel.com>
Closes #1645 from lirui-intel/spark-2740 and squashes the following commits:
fb5d52e [Rui Li] SPARK-2740: allow user to specify ascending and numPartitions for sortByKey
Diffstat (limited to 'core/src')
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index 47708cb2e7..76d4193e96 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -789,6 +789,17 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in * order of the keys). */ + def sortByKey(ascending: Boolean, numPartitions: Int): JavaPairRDD[K, V] = { + val comp = com.google.common.collect.Ordering.natural().asInstanceOf[Comparator[K]] + sortByKey(comp, ascending, numPartitions) + } + + /** + * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling + * `collect` or `save` on the resulting RDD will return or output an ordered list of records + * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in + * order of the keys). + */ def sortByKey(comp: Comparator[K]): JavaPairRDD[K, V] = sortByKey(comp, true) /** |