aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorzsxwing <zsxwing@gmail.com>2014-12-22 11:20:00 -0800
committerReynold Xin <rxin@databricks.com>2014-12-22 11:20:00 -0800
commit93b2f3a8826e189f6398c9b30bc00de205a3c64a (patch)
tree1f73ead69c154e214a689c72e322f3a2b0271005 /core
parent6ee6aa70b7d52408cc66bd1434cbeae3212e3f01 (diff)
downloadspark-93b2f3a8826e189f6398c9b30bc00de205a3c64a.tar.gz
spark-93b2f3a8826e189f6398c9b30bc00de205a3c64a.tar.bz2
spark-93b2f3a8826e189f6398c9b30bc00de205a3c64a.zip
[SPARK-4918][Core] Reuse Text in saveAsTextFile
Reuse Text in saveAsTextFile to reduce GC. /cc rxin Author: zsxwing <zsxwing@gmail.com> Closes #3762 from zsxwing/SPARK-4918 and squashes the following commits: 59f03eb [zsxwing] Reuse Text in saveAsTextFile
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala16
1 files changed, 14 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index a94206963b..f47c2d1fcd 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1186,7 +1186,13 @@ abstract class RDD[T: ClassTag](
// same bytecodes for `saveAsTextFile`.
val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
val textClassTag = implicitly[ClassTag[Text]]
- val r = this.map(x => (NullWritable.get(), new Text(x.toString)))
+ val r = this.mapPartitions { iter =>
+ val text = new Text()
+ iter.map { x =>
+ text.set(x.toString)
+ (NullWritable.get(), text)
+ }
+ }
RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
.saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path)
}
@@ -1198,7 +1204,13 @@ abstract class RDD[T: ClassTag](
// https://issues.apache.org/jira/browse/SPARK-2075
val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
val textClassTag = implicitly[ClassTag[Text]]
- val r = this.map(x => (NullWritable.get(), new Text(x.toString)))
+ val r = this.mapPartitions { iter =>
+ val text = new Text()
+ iter.map { x =>
+ text.set(x.toString)
+ (NullWritable.get(), text)
+ }
+ }
RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
.saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec)
}