diff options
author | Michael Armbrust <michael@databricks.com> | 2014-04-16 18:12:56 -0700 |
---|---|---|
committer | Reynold Xin <rxin@apache.org> | 2014-04-16 18:12:56 -0700 |
commit | d4916a8eeb747e748b9fba380e9c9503ed11faed (patch) | |
tree | 8667f3133eb1806e97a51d077e7faa3841b60494 | |
parent | 07b7ad30808ea73a375be8a86e96199d704d1422 (diff) | |
download | spark-d4916a8eeb747e748b9fba380e9c9503ed11faed.tar.gz spark-d4916a8eeb747e748b9fba380e9c9503ed11faed.tar.bz2 spark-d4916a8eeb747e748b9fba380e9c9503ed11faed.zip |
Include stack trace for exceptions thrown by user code.
It is very confusing when your code throws an exception, but the only stack trace show is in the DAGScheduler. This is a simple patch to include the stack trace for the actual failure in the error message. Suggestions on formatting welcome.
Before:
```
scala> sc.parallelize(1 :: Nil).map(_ => sys.error("Ahh!")).collect()
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times (most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh!)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037)
...
```
After:
```
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times, most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh!
scala.sys.package$.error(package.scala:27)
$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13)
$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13)
scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
scala.collection.Iterator$class.foreach(Iterator.scala:727)
scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
scala.collection.AbstractIterator.to(Iterator.scala:1157)
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676)
org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676)
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048)
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:110)
org.apache.spark.scheduler.Task.run(Task.scala:50)
org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:211)
org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:46)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:176)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
java.lang.Thread.run(Thread.java:744)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$abortStage(DAGScheduler.scala:1037)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.scala:614)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$start$1$$anon$2$$anonfun$receive$1.applyOrElse(DAGScheduler.scala:143)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
```
Author: Michael Armbrust <michael@databricks.com>
Closes #409 from marmbrus/stacktraces and squashes the following commits:
3e4eb65 [Michael Armbrust] indent. include header for driver stack trace.
018b06b [Michael Armbrust] Include stack trace for exceptions in user code.
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 86d2050a03..a81b834267 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -538,8 +538,8 @@ private[spark] class TaskSetManager( return } val key = ef.description - failureReason = "Exception failure in TID %s on host %s: %s".format( - tid, info.host, ef.description) + failureReason = "Exception failure in TID %s on host %s: %s\n%s".format( + tid, info.host, ef.description, ef.stackTrace.map(" " + _).mkString("\n")) val now = clock.getTime() val (printFull, dupCount) = { if (recentExceptions.contains(key)) { @@ -582,7 +582,7 @@ private[spark] class TaskSetManager( if (numFailures(index) >= maxTaskFailures) { logError("Task %s:%d failed %d times; aborting job".format( taskSet.id, index, maxTaskFailures)) - abort("Task %s:%d failed %d times (most recent failure: %s)".format( + abort("Task %s:%d failed %d times, most recent failure: %s\nDriver stacktrace:".format( taskSet.id, index, maxTaskFailures, failureReason)) return } |