Refactoring to DAGScheduler to aid testing

author: Charles Reiss <charles@eecs.berkeley.edu> 2013-01-28 22:41:08 -0800
committer: Charles Reiss <charles@eecs.berkeley.edu> 2013-01-29 18:55:42 -0800
commit: a3d14c0404d6b28433784f84086a29ecc0045a12 (patch)
tree: 5296faa7a9e550342497d90c4f600b97587d99fd
parent: 0f81025ecadbfd21edb64602658ae8ba26e5bf66 (diff)
download: spark-a3d14c0404d6b28433784f84086a29ecc0045a12.tar.gz
spark-a3d14c0404d6b28433784f84086a29ecc0045a12.tar.bz2
spark-a3d14c0404d6b28433784f84086a29ecc0045a12.zip
2 files changed, 18 insertions, 12 deletions
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index dc9b8688b3..6ae04f4a44 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -187,6 +187,7 @@ class SparkContext(
   taskScheduler.start()
 
   private var dagScheduler = new DAGScheduler(taskScheduler)
+  dagScheduler.start()
 
   /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
   val hadoopConfiguration = {
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index b130be6a38..9655961162 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -23,7 +23,14 @@ import util.{MetadataCleaner, TimeStampedHashMap}
  * and to report fetch failures (the submitTasks method, and code to add CompletionEvents).
  */
 private[spark]
-class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with Logging {
+class DAGScheduler(taskSched: TaskScheduler, 
+                   mapOutputTracker: MapOutputTracker,
+                   blockManagerMaster: BlockManagerMaster,
+                   env: SparkEnv)
+                      extends TaskSchedulerListener with Logging {
+  def this(taskSched: TaskScheduler) {
+    this(taskSched, SparkEnv.get.mapOutputTracker, SparkEnv.get.blockManager.master, SparkEnv.get)
+  }
   taskSched.setListener(this)
 
   // Called by TaskScheduler to report task completions or failures.
@@ -66,10 +73,6 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
 
   var cacheLocs = new HashMap[Int, Array[List[String]]]
 
-  val env = SparkEnv.get
-  val mapOutputTracker = env.mapOutputTracker
-  val blockManagerMaster = env.blockManager.master
-
   // For tracking failed nodes, we use the MapOutputTracker's generation number, which is
   // sent with every task. When we detect a node failing, we note the current generation number
   // and failed executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask
@@ -90,12 +93,14 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   val metadataCleaner = new MetadataCleaner("DAGScheduler", this.cleanup)
 
   // Start a thread to run the DAGScheduler event loop
-  new Thread("DAGScheduler") {
-    setDaemon(true)
-    override def run() {
-      DAGScheduler.this.run()
-    }
-  }.start()
+  def start() {
+    new Thread("DAGScheduler") {
+      setDaemon(true)
+      override def run() {
+        DAGScheduler.this.run()
+      }
+    }.start()
+  }
 
   def getCacheLocs(rdd: RDD[_]): Array[List[String]] = {
     if (!cacheLocs.contains(rdd.id)) {
@@ -546,7 +551,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) {
       failedGeneration(execId) = currentGeneration
       logInfo("Executor lost: %s (generation %d)".format(execId, currentGeneration))
-      env.blockManager.master.removeExecutor(execId)
+      blockManagerMaster.removeExecutor(execId)
       // TODO: This will be really slow if we keep accumulating shuffle map stages
       for ((shuffleId, stage) <- shuffleToMapStage) {
         stage.removeOutputsOnExecutor(execId)
author	Charles Reiss <charles@eecs.berkeley.edu>	2013-01-28 22:41:08 -0800
committer	Charles Reiss <charles@eecs.berkeley.edu>	2013-01-29 18:55:42 -0800
commit	a3d14c0404d6b28433784f84086a29ecc0045a12 (patch)
tree	5296faa7a9e550342497d90c4f600b97587d99fd
parent	0f81025ecadbfd21edb64602658ae8ba26e5bf66 (diff)
download	spark-a3d14c0404d6b28433784f84086a29ecc0045a12.tar.gz spark-a3d14c0404d6b28433784f84086a29ecc0045a12.tar.bz2 spark-a3d14c0404d6b28433784f84086a29ecc0045a12.zip