streaming/src/main/scala/spark/streaming/Scheduler.scala


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package spark.streaming

import util.{ManualClock, RecurringTimer, Clock}
import spark.SparkEnv
import spark.Logging

private[streaming]
class Scheduler(ssc: StreamingContext) extends Logging {

  initLogging()

  val concurrentJobs = System.getProperty("spark.streaming.concurrentJobs", "1").toInt
  val jobManager = new JobManager(ssc, concurrentJobs)
  val checkpointWriter = if (ssc.checkpointDuration != null && ssc.checkpointDir != null) {
    new CheckpointWriter(ssc.checkpointDir)
  } else {
    null
  }

  val clockClass = System.getProperty("spark.streaming.clock", "spark.streaming.util.SystemClock")
  val clock = Class.forName(clockClass).newInstance().asInstanceOf[Clock]
  val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
    longTime => generateJobs(new Time(longTime)))
  val graph = ssc.graph
  var latestTime: Time = null

  def start() = synchronized {
    if (ssc.isCheckpointPresent) {
      restart()
    } else {
      startFirstTime()
    }
    logInfo("Scheduler started")
  }
  
  def stop() = synchronized {
    timer.stop()
    jobManager.stop()
    if (checkpointWriter != null) checkpointWriter.stop()
    ssc.graph.stop()
    logInfo("Scheduler stopped")    
  }

  private def startFirstTime() {
    val startTime = new Time(timer.getStartTime())
    graph.start(startTime - graph.batchDuration)
    timer.start(startTime.milliseconds)
    logInfo("Scheduler's timer started at " + startTime)
  }

  private def restart() {

    // If manual clock is being used for testing, then
    // either set the manual clock to the last checkpointed time,
    // or if the property is defined set it to that time
    if (clock.isInstanceOf[ManualClock]) {
      val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
      val jumpTime = System.getProperty("spark.streaming.manualClock.jump", "0").toLong
      clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
    }

    val batchDuration = ssc.graph.batchDuration

    // Batches when the master was down, that is,
    // between the checkpoint and current restart time
    val checkpointTime = ssc.initialCheckpoint.checkpointTime
    val restartTime = new Time(timer.getRestartTime(graph.zeroTime.milliseconds))
    val downTimes = checkpointTime.until(restartTime, batchDuration)
    logInfo("Batches during down time: " + downTimes.mkString(", "))

    // Batches that were unprocessed before failure
    val pendingTimes = ssc.initialCheckpoint.pendingTimes
    logInfo("Batches pending processing: " + pendingTimes.mkString(", "))
    // Reschedule jobs for these times
    val timesToReschedule = (pendingTimes ++ downTimes).distinct.sorted(Time.ordering)
    logInfo("Batches to reschedule: " + timesToReschedule.mkString(", "))
    timesToReschedule.foreach(time =>
      graph.generateJobs(time).foreach(jobManager.runJob)
    )

    // Restart the timer
    timer.start(restartTime.milliseconds)
    logInfo("Scheduler's timer restarted at " + restartTime)
  }

  /** Generate jobs and perform checkpoint for the given `time`.  */
  def generateJobs(time: Time) {
    SparkEnv.set(ssc.env)
    logInfo("\n-----------------------------------------------------\n")
    graph.generateJobs(time).foreach(jobManager.runJob)
    latestTime = time
    doCheckpoint(time)
  }

  /**
   * Clear old metadata assuming jobs of `time` have finished processing.
   * And also perform checkpoint.
   */
  def clearOldMetadata(time: Time) {
    ssc.graph.clearOldMetadata(time)
    doCheckpoint(time)
  }

  /** Perform checkpoint for the give `time`. */
  def doCheckpoint(time: Time) = synchronized {
    if (ssc.checkpointDuration != null && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
      logInfo("Checkpointing graph for time " + time)
      ssc.graph.updateCheckpointData(time)
      checkpointWriter.write(new Checkpoint(ssc, time))
    }
  }
}