diff options
author | Punya Biswal <pbiswal@palantir.com> | 2014-02-16 18:55:59 -0800 |
---|---|---|
committer | Aaron Davidson <aaron@databricks.com> | 2014-02-16 18:55:59 -0800 |
commit | 5af4477c2b191f1ffd9814192d7017e85cf95191 (patch) | |
tree | 9592c9dc004a263939e663e5d65ff2c4154179a6 /core | |
parent | 73cfdcfe71c3fdd4a9c5e71c8568f25371dab9bf (diff) | |
download | spark-5af4477c2b191f1ffd9814192d7017e85cf95191.tar.gz spark-5af4477c2b191f1ffd9814192d7017e85cf95191.tar.bz2 spark-5af4477c2b191f1ffd9814192d7017e85cf95191.zip |
Add subtractByKey to the JavaPairRDD wrapper
Author: Punya Biswal <pbiswal@palantir.com>
Closes #600 from punya/subtractByKey-java and squashes the following commits:
e961913 [Punya Biswal] Hide implicit ClassTags from Java API
c5d317b [Punya Biswal] Add subtractByKey to the JavaPairRDD wrapper
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index 5b1bf9476e..cd0aea0cb3 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -278,6 +278,29 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kClassTag: ClassTag[K fromRDD(rdd.subtract(other, p)) /** + * Return an RDD with the pairs from `this` whose keys are not in `other`. + * + * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting + * RDD will be <= us. + */ + def subtractByKey[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, V] = { + implicit val cmw: ClassTag[W] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + fromRDD(rdd.subtractByKey(other)) + } + + /** Return an RDD with the pairs from `this` whose keys are not in `other`. */ + def subtractByKey[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, V] = { + implicit val cmw: ClassTag[W] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + fromRDD(rdd.subtractByKey(other, numPartitions)) + } + + /** Return an RDD with the pairs from `this` whose keys are not in `other`. */ + def subtractByKey[W](other: JavaPairRDD[K, W], p: Partitioner): JavaPairRDD[K, V] = { + implicit val cmw: ClassTag[W] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + fromRDD(rdd.subtractByKey(other, p)) + } + + /** * Return a copy of the RDD partitioned using the specified partitioner. */ def partitionBy(partitioner: Partitioner): JavaPairRDD[K, V] = |