aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies.liu@gmail.com>2014-10-07 12:20:12 -0700
committerJosh Rosen <joshrosen@apache.org>2014-10-07 12:20:12 -0700
commitbc87cc410fae59660c13b6ae1c14204df77237b8 (patch)
tree70b2d9b7dc7488570045e8a99585ba9021094621
parent655032965fc7e2368dff9947fc024ac720ffd19c (diff)
downloadspark-bc87cc410fae59660c13b6ae1c14204df77237b8.tar.gz
spark-bc87cc410fae59660c13b6ae1c14204df77237b8.tar.bz2
spark-bc87cc410fae59660c13b6ae1c14204df77237b8.zip
[SPARK-3731] [PySpark] fix memory leak in PythonRDD
The parent.getOrCompute() of PythonRDD is executed in a separated thread, it should release the memory reserved for shuffle and unrolling finally. Author: Davies Liu <davies.liu@gmail.com> Closes #2668 from davies/leak and squashes the following commits: ae98be2 [Davies Liu] fix memory leak in PythonRDD
-rw-r--r--core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala5
1 files changed, 5 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index ad6eb9ef50..c74f86548e 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -247,6 +247,11 @@ private[spark] class PythonRDD(
// will kill the whole executor (see org.apache.spark.executor.Executor).
_exception = e
worker.shutdownOutput()
+ } finally {
+ // Release memory used by this thread for shuffles
+ env.shuffleMemoryManager.releaseMemoryForThisThread()
+ // Release memory used by this thread for unrolling blocks
+ env.blockManager.memoryStore.releaseUnrollMemoryForThisThread()
}
}
}