aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala21
-rw-r--r--python/pyspark/worker.py1
2 files changed, 18 insertions, 4 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 0d508d624f..ba085c5974 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -145,11 +145,24 @@ private[spark] class PythonRDD(
stream.readFully(update)
accumulator += Collections.singletonList(update)
}
+
// Check whether the worker is ready to be re-used.
- if (stream.readInt() == SpecialLengths.END_OF_STREAM) {
- if (reuse_worker) {
- env.releasePythonWorker(pythonExec, envVars.toMap, worker)
- released = true
+ if (reuse_worker) {
+ // It has a high possibility that the ending mark is already available,
+ // And current task should not be blocked by checking it
+
+ if (stream.available() >= 4) {
+ val ending = stream.readInt()
+ if (ending == SpecialLengths.END_OF_STREAM) {
+ env.releasePythonWorker(pythonExec, envVars.toMap, worker)
+ released = true
+ logInfo(s"Communication with worker ended cleanly, re-use it: $worker")
+ } else {
+ logInfo(s"Communication with worker did not end cleanly (ending with $ending), " +
+ s"close it: $worker")
+ }
+ } else {
+ logInfo(s"The ending mark from worker is not available, close it: $worker")
}
}
null
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 7e5343c973..c2ddd4d4a5 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -127,6 +127,7 @@ def main(infile, outfile):
write_int(len(_accumulatorRegistry), outfile)
for (aid, accum) in _accumulatorRegistry.items():
pickleSer._write_with_length((aid, accum._value), outfile)
+ outfile.flush()
# check end of stream
if read_int(infile) == SpecialLengths.END_OF_STREAM: