aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-05-29 22:19:15 -0700
committerReynold Xin <rxin@databricks.com>2015-05-29 22:19:15 -0700
commit7ed06c39922ac90acab3a78ce0f2f21184ed68a5 (patch)
treea9f80b3faa0304ad9983a2b73e0be45f5f213624 /core
parent3792d25836e1e521da64c5a62ca1b6cca1bcb6b9 (diff)
downloadspark-7ed06c39922ac90acab3a78ce0f2f21184ed68a5.tar.gz
spark-7ed06c39922ac90acab3a78ce0f2f21184ed68a5.tar.bz2
spark-7ed06c39922ac90acab3a78ce0f2f21184ed68a5.zip
[SPARK-7957] Preserve partitioning when using randomSplit
cc JoshRosen Thanks for noticing this! Author: Burak Yavuz <brkyvz@gmail.com> Closes #6509 from brkyvz/sample-perf-reg and squashes the following commits: 497465d [Burak Yavuz] addressed code review 293f95f [Burak Yavuz] [SPARK-7957] Preserve partitioning when using randomSplit
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala4
1 files changed, 2 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 5fcef255e1..10610f4b6f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -434,11 +434,11 @@ abstract class RDD[T: ClassTag](
* @return A random sub-sample of the RDD without replacement.
*/
private[spark] def randomSampleWithRange(lb: Double, ub: Double, seed: Long): RDD[T] = {
- this.mapPartitionsWithIndex { case (index, partition) =>
+ this.mapPartitionsWithIndex( { (index, partition) =>
val sampler = new BernoulliCellSampler[T](lb, ub)
sampler.setSeed(seed + index)
sampler.sample(partition)
- }
+ }, preservesPartitioning = true)
}
/**