diff options
author | zsxwing <zsxwing@gmail.com> | 2014-12-22 11:20:00 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2014-12-22 11:20:00 -0800 |
commit | 93b2f3a8826e189f6398c9b30bc00de205a3c64a (patch) | |
tree | 1f73ead69c154e214a689c72e322f3a2b0271005 | |
parent | 6ee6aa70b7d52408cc66bd1434cbeae3212e3f01 (diff) | |
download | spark-93b2f3a8826e189f6398c9b30bc00de205a3c64a.tar.gz spark-93b2f3a8826e189f6398c9b30bc00de205a3c64a.tar.bz2 spark-93b2f3a8826e189f6398c9b30bc00de205a3c64a.zip |
[SPARK-4918][Core] Reuse Text in saveAsTextFile
Reuse Text in saveAsTextFile to reduce GC.
/cc rxin
Author: zsxwing <zsxwing@gmail.com>
Closes #3762 from zsxwing/SPARK-4918 and squashes the following commits:
59f03eb [zsxwing] Reuse Text in saveAsTextFile
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/RDD.scala | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index a94206963b..f47c2d1fcd 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1186,7 +1186,13 @@ abstract class RDD[T: ClassTag]( // same bytecodes for `saveAsTextFile`. val nullWritableClassTag = implicitly[ClassTag[NullWritable]] val textClassTag = implicitly[ClassTag[Text]] - val r = this.map(x => (NullWritable.get(), new Text(x.toString))) + val r = this.mapPartitions { iter => + val text = new Text() + iter.map { x => + text.set(x.toString) + (NullWritable.get(), text) + } + } RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null) .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path) } @@ -1198,7 +1204,13 @@ abstract class RDD[T: ClassTag]( // https://issues.apache.org/jira/browse/SPARK-2075 val nullWritableClassTag = implicitly[ClassTag[NullWritable]] val textClassTag = implicitly[ClassTag[Text]] - val r = this.map(x => (NullWritable.get(), new Text(x.toString))) + val r = this.mapPartitions { iter => + val text = new Text() + iter.map { x => + text.set(x.toString) + (NullWritable.get(), text) + } + } RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null) .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec) } |