diff options
author | Michael Armbrust <michael@databricks.com> | 2014-04-16 18:12:56 -0700 |
---|---|---|
committer | Reynold Xin <rxin@apache.org> | 2014-04-16 18:13:06 -0700 |
commit | aef8a4a51c6f7f982e31d7636f66eab595d34f2e (patch) | |
tree | 416fa75a8bc4f7c8c59f5b277b7b8b2ab2e88b2c /core | |
parent | 822353dc59ce6eb410e9f17f78be24afaa199679 (diff) | |
download | spark-aef8a4a51c6f7f982e31d7636f66eab595d34f2e.tar.gz spark-aef8a4a51c6f7f982e31d7636f66eab595d34f2e.tar.bz2 spark-aef8a4a51c6f7f982e31d7636f66eab595d34f2e.zip |
Include stack trace for exceptions thrown by user code.
It is very confusing when your code throws an exception, but the only stack trace show is in the DAGScheduler. This is a simple patch to include the stack trace for the actual failure in the error message. Suggestions on formatting welcome.
Before:
```
scala> sc.parallelize(1 :: Nil).map(_ => sys.error("Ahh!")).collect()
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times (most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh!)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037)
...
```
After:
```
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times, most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh!
scala.sys.package$.error(package.scala:27)
$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13)
$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13)
scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
scala.collection.Iterator$class.foreach(Iterator.scala:727)
scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
scala.collection.AbstractIterator.to(Iterator.scala:1157)
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676)
org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676)
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048)
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:110)
org.apache.spark.scheduler.Task.run(Task.scala:50)
org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:211)
org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:46)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:176)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
java.lang.Thread.run(Thread.java:744)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$abortStage(DAGScheduler.scala:1037)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.scala:614)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$start$1$$anon$2$$anonfun$receive$1.applyOrElse(DAGScheduler.scala:143)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
```
Author: Michael Armbrust <michael@databricks.com>
Closes #409 from marmbrus/stacktraces and squashes the following commits:
3e4eb65 [Michael Armbrust] indent. include header for driver stack trace.
018b06b [Michael Armbrust] Include stack trace for exceptions in user code.
(cherry picked from commit d4916a8eeb747e748b9fba380e9c9503ed11faed)
Signed-off-by: Reynold Xin <rxin@apache.org>
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 86d2050a03..a81b834267 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -538,8 +538,8 @@ private[spark] class TaskSetManager( return } val key = ef.description - failureReason = "Exception failure in TID %s on host %s: %s".format( - tid, info.host, ef.description) + failureReason = "Exception failure in TID %s on host %s: %s\n%s".format( + tid, info.host, ef.description, ef.stackTrace.map(" " + _).mkString("\n")) val now = clock.getTime() val (printFull, dupCount) = { if (recentExceptions.contains(key)) { @@ -582,7 +582,7 @@ private[spark] class TaskSetManager( if (numFailures(index) >= maxTaskFailures) { logError("Task %s:%d failed %d times; aborting job".format( taskSet.id, index, maxTaskFailures)) - abort("Task %s:%d failed %d times (most recent failure: %s)".format( + abort("Task %s:%d failed %d times, most recent failure: %s\nDriver stacktrace:".format( taskSet.id, index, maxTaskFailures, failureReason)) return } |