diff options
author | Burak Yavuz <brkyvz@gmail.com> | 2015-05-29 22:19:15 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-05-29 22:19:15 -0700 |
commit | 7ed06c39922ac90acab3a78ce0f2f21184ed68a5 (patch) | |
tree | a9f80b3faa0304ad9983a2b73e0be45f5f213624 | |
parent | 3792d25836e1e521da64c5a62ca1b6cca1bcb6b9 (diff) | |
download | spark-7ed06c39922ac90acab3a78ce0f2f21184ed68a5.tar.gz spark-7ed06c39922ac90acab3a78ce0f2f21184ed68a5.tar.bz2 spark-7ed06c39922ac90acab3a78ce0f2f21184ed68a5.zip |
[SPARK-7957] Preserve partitioning when using randomSplit
cc JoshRosen
Thanks for noticing this!
Author: Burak Yavuz <brkyvz@gmail.com>
Closes #6509 from brkyvz/sample-perf-reg and squashes the following commits:
497465d [Burak Yavuz] addressed code review
293f95f [Burak Yavuz] [SPARK-7957] Preserve partitioning when using randomSplit
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/RDD.scala | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 5fcef255e1..10610f4b6f 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -434,11 +434,11 @@ abstract class RDD[T: ClassTag]( * @return A random sub-sample of the RDD without replacement. */ private[spark] def randomSampleWithRange(lb: Double, ub: Double, seed: Long): RDD[T] = { - this.mapPartitionsWithIndex { case (index, partition) => + this.mapPartitionsWithIndex( { (index, partition) => val sampler = new BernoulliCellSampler[T](lb, ub) sampler.setSeed(seed + index) sampler.sample(partition) - } + }, preservesPartitioning = true) } /** |