aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2015-05-29 14:59:18 -0700
committerJosh Rosen <joshrosen@databricks.com>2015-05-29 14:59:18 -0700
commit82a396c2f594bade276606dcd0c0545a650fb838 (patch)
treede098342982d30d272f925bf61b291b8ee0b3f2a
parent1c5b19827a091b5aba69a967600e7ca35ed3bcfd (diff)
downloadspark-82a396c2f594bade276606dcd0c0545a650fb838.tar.gz
spark-82a396c2f594bade276606dcd0c0545a650fb838.tar.bz2
spark-82a396c2f594bade276606dcd0c0545a650fb838.zip
[SPARK-7910] [TINY] [JAVAAPI] expose partitioner information in javardd
Author: Holden Karau <holden@pigscanfly.ca> Closes #6464 from holdenk/SPARK-7910-expose-partitioner-information-in-javardd and squashes the following commits: de1e644 [Holden Karau] Fix the test to get the partitioner bdb31cc [Holden Karau] Add Mima exclude for the new method 347ef4c [Holden Karau] Add a quick little test for the partitioner JavaAPI f49dca9 [Holden Karau] Add partitoner information to JavaRDDLike and fix some whitespace
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala9
-rw-r--r--core/src/test/java/org/apache/spark/JavaAPISuite.java2
-rw-r--r--project/MimaExcludes.scala2
3 files changed, 10 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index b8e15f38a2..c95615a5a9 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -60,10 +60,13 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
@deprecated("Use partitions() instead.", "1.1.0")
def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
-
+
/** Set of partitions in this RDD. */
def partitions: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
+ /** The partitioner of this RDD. */
+ def partitioner: Optional[Partitioner] = JavaUtils.optionToOptional(rdd.partitioner)
+
/** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
def context: SparkContext = rdd.context
@@ -492,9 +495,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
new java.util.ArrayList(arr)
}
- def takeSample(withReplacement: Boolean, num: Int): JList[T] =
+ def takeSample(withReplacement: Boolean, num: Int): JList[T] =
takeSample(withReplacement, num, Utils.random.nextLong)
-
+
def takeSample(withReplacement: Boolean, num: Int, seed: Long): JList[T] = {
import scala.collection.JavaConversions._
val arr: java.util.Collection[T] = rdd.takeSample(withReplacement, num, seed).toSeq
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index c2089b0e56..dfd86d3e51 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -212,6 +212,8 @@ public class JavaAPISuite implements Serializable {
JavaPairRDD<Integer, Integer> repartitioned =
rdd.repartitionAndSortWithinPartitions(partitioner);
+ Assert.assertTrue(repartitioned.partitioner().isPresent());
+ Assert.assertEquals(repartitioned.partitioner().get(), partitioner);
List<List<Tuple2<Integer, Integer>>> partitions = repartitioned.glom().collect();
Assert.assertEquals(partitions.get(0), Arrays.asList(new Tuple2<Integer, Integer>(0, 5),
new Tuple2<Integer, Integer>(0, 8), new Tuple2<Integer, Integer>(2, 6)));
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 11b439e787..8da72b3fa7 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -38,6 +38,8 @@ object MimaExcludes {
Seq(
MimaBuild.excludeSparkPackage("deploy"),
MimaBuild.excludeSparkPackage("ml"),
+ // SPARK-7910 Adding a method to get the partioner to JavaRDD,
+ ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitioner"),
// SPARK-5922 Adding a generalized diff(other: RDD[(VertexId, VD)]) to VertexRDD
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.diff"),
// These are needed if checking against the sbt build, since they are part of