aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorMatei Zaharia <matei@databricks.com>2014-01-12 00:10:41 -0800
committerMatei Zaharia <matei@databricks.com>2014-01-12 00:10:41 -0800
commit5741078c46828f124bb8286919398a6c346b109c (patch)
treeaaea88ae21672604bab29de62f567bfbbf701f4a /python/pyspark
parentf00e949f84df949fbe32c254b592a580b4623811 (diff)
downloadspark-5741078c46828f124bb8286919398a6c346b109c.tar.gz
spark-5741078c46828f124bb8286919398a6c346b109c.tar.bz2
spark-5741078c46828f124bb8286919398a6c346b109c.zip
Log Python exceptions to stderr as well
This helps in case the exception happened while serializing a record to be sent to Java, leaving the stream to Java in an inconsistent state where PythonRDD won't be able to read the error.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/worker.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index f2b3f3c142..d77981f61f 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -76,6 +76,10 @@ def main(infile, outfile):
iterator = deserializer.load_stream(infile)
serializer.dump_stream(func(split_index, iterator), outfile)
except Exception as e:
+ # Write the error to stderr in addition to trying to passi t back to
+ # Java, in case it happened while serializing a record
+ print >> sys.stderr, "PySpark worker failed with exception:"
+ print >> sys.stderr, traceback.format_exc()
write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
write_with_length(traceback.format_exc(), outfile)
sys.exit(-1)