From 5741078c46828f124bb8286919398a6c346b109c Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sun, 12 Jan 2014 00:10:41 -0800 Subject: Log Python exceptions to stderr as well This helps in case the exception happened while serializing a record to be sent to Java, leaving the stream to Java in an inconsistent state where PythonRDD won't be able to read the error. --- python/pyspark/worker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index f2b3f3c142..d77981f61f 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -76,6 +76,10 @@ def main(infile, outfile): iterator = deserializer.load_stream(infile) serializer.dump_stream(func(split_index, iterator), outfile) except Exception as e: + # Write the error to stderr in addition to trying to passi t back to + # Java, in case it happened while serializing a record + print >> sys.stderr, "PySpark worker failed with exception:" + print >> sys.stderr, traceback.format_exc() write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile) write_with_length(traceback.format_exc(), outfile) sys.exit(-1) -- cgit v1.2.3