diff options
author | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-09-28 23:55:17 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-09-28 23:55:17 -0700 |
commit | 8654165e692d881c38e7d7e342974ba766452741 (patch) | |
tree | 3056160008ebc280c34813b2f051333a0df27d80 | |
parent | 37c199bbb098c68efecb4f8bd10b5cb8dfd9da3b (diff) | |
download | spark-8654165e692d881c38e7d7e342974ba766452741.tar.gz spark-8654165e692d881c38e7d7e342974ba766452741.tar.bz2 spark-8654165e692d881c38e7d7e342974ba766452741.zip |
Use null as dummy value in distinct().
-rw-r--r-- | core/src/main/scala/spark/RDD.scala | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala index 3cf2ff5ea4..10cf25bd62 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/spark/RDD.scala @@ -169,7 +169,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial def filter(f: T => Boolean): RDD[T] = new FilteredRDD(this, sc.clean(f)) def distinct(numSplits: Int = splits.size): RDD[T] = - map(x => (x, "")).reduceByKey((x, y) => x, numSplits).map(_._1) + map(x => (x, null)).reduceByKey((x, y) => x, numSplits).map(_._1) def sample(withReplacement: Boolean, fraction: Double, seed: Int): RDD[T] = new SampledRDD(this, withReplacement, fraction, seed) |