aboutsummaryrefslogtreecommitdiff
path: root/core/src/main
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2012-09-28 23:55:17 -0700
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2012-09-28 23:55:17 -0700
commit8654165e692d881c38e7d7e342974ba766452741 (patch)
tree3056160008ebc280c34813b2f051333a0df27d80 /core/src/main
parent37c199bbb098c68efecb4f8bd10b5cb8dfd9da3b (diff)
downloadspark-8654165e692d881c38e7d7e342974ba766452741.tar.gz
spark-8654165e692d881c38e7d7e342974ba766452741.tar.bz2
spark-8654165e692d881c38e7d7e342974ba766452741.zip
Use null as dummy value in distinct().
Diffstat (limited to 'core/src/main')
-rw-r--r--core/src/main/scala/spark/RDD.scala2
1 files changed, 1 insertions, 1 deletions
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 3cf2ff5ea4..10cf25bd62 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -169,7 +169,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
def filter(f: T => Boolean): RDD[T] = new FilteredRDD(this, sc.clean(f))
def distinct(numSplits: Int = splits.size): RDD[T] =
- map(x => (x, "")).reduceByKey((x, y) => x, numSplits).map(_._1)
+ map(x => (x, null)).reduceByKey((x, y) => x, numSplits).map(_._1)
def sample(withReplacement: Boolean, fraction: Double, seed: Int): RDD[T] =
new SampledRDD(this, withReplacement, fraction, seed)