aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-04-16 18:12:56 -0700
committerReynold Xin <rxin@apache.org>2014-04-16 18:13:06 -0700
commitaef8a4a51c6f7f982e31d7636f66eab595d34f2e (patch)
tree416fa75a8bc4f7c8c59f5b277b7b8b2ab2e88b2c /core
parent822353dc59ce6eb410e9f17f78be24afaa199679 (diff)
downloadspark-aef8a4a51c6f7f982e31d7636f66eab595d34f2e.tar.gz
spark-aef8a4a51c6f7f982e31d7636f66eab595d34f2e.tar.bz2
spark-aef8a4a51c6f7f982e31d7636f66eab595d34f2e.zip
Include stack trace for exceptions thrown by user code.
It is very confusing when your code throws an exception, but the only stack trace show is in the DAGScheduler. This is a simple patch to include the stack trace for the actual failure in the error message. Suggestions on formatting welcome. Before: ``` scala> sc.parallelize(1 :: Nil).map(_ => sys.error("Ahh!")).collect() org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times (most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh!) at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037) ... ``` After: ``` org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times, most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh! scala.sys.package$.error(package.scala:27) $iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13) $iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13) scala.collection.Iterator$$anon$11.next(Iterator.scala:328) scala.collection.Iterator$class.foreach(Iterator.scala:727) scala.collection.AbstractIterator.foreach(Iterator.scala:1157) scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) scala.collection.AbstractIterator.to(Iterator.scala:1157) scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) scala.collection.AbstractIterator.toArray(Iterator.scala:1157) org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676) org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676) org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048) org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:110) org.apache.spark.scheduler.Task.run(Task.scala:50) org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:211) org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:46) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:176) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:744) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$abortStage(DAGScheduler.scala:1037) at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614) at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.scala:614) at org.apache.spark.scheduler.DAGScheduler$$anonfun$start$1$$anon$2$$anonfun$receive$1.applyOrElse(DAGScheduler.scala:143) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) ``` Author: Michael Armbrust <michael@databricks.com> Closes #409 from marmbrus/stacktraces and squashes the following commits: 3e4eb65 [Michael Armbrust] indent. include header for driver stack trace. 018b06b [Michael Armbrust] Include stack trace for exceptions in user code. (cherry picked from commit d4916a8eeb747e748b9fba380e9c9503ed11faed) Signed-off-by: Reynold Xin <rxin@apache.org>
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala6
1 files changed, 3 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 86d2050a03..a81b834267 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -538,8 +538,8 @@ private[spark] class TaskSetManager(
return
}
val key = ef.description
- failureReason = "Exception failure in TID %s on host %s: %s".format(
- tid, info.host, ef.description)
+ failureReason = "Exception failure in TID %s on host %s: %s\n%s".format(
+ tid, info.host, ef.description, ef.stackTrace.map(" " + _).mkString("\n"))
val now = clock.getTime()
val (printFull, dupCount) = {
if (recentExceptions.contains(key)) {
@@ -582,7 +582,7 @@ private[spark] class TaskSetManager(
if (numFailures(index) >= maxTaskFailures) {
logError("Task %s:%d failed %d times; aborting job".format(
taskSet.id, index, maxTaskFailures))
- abort("Task %s:%d failed %d times (most recent failure: %s)".format(
+ abort("Task %s:%d failed %d times, most recent failure: %s\nDriver stacktrace:".format(
taskSet.id, index, maxTaskFailures, failureReason))
return
}