aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
blob: 5522b40782d9e1450632407ada41768c4cd3c45b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.spark.rpc.netty

import java.io._
import java.net.{InetSocketAddress, URI}
import java.nio.ByteBuffer
import java.util.Arrays
import java.util.concurrent._
import javax.annotation.concurrent.GuardedBy

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.concurrent.{Future, Promise}
import scala.reflect.ClassTag
import scala.util.{DynamicVariable, Failure, Success}
import scala.util.control.NonFatal

import org.apache.spark.{Logging, SecurityManager, SparkConf}
import org.apache.spark.network.TransportContext
import org.apache.spark.network.client._
import org.apache.spark.network.netty.SparkTransportConf
import org.apache.spark.network.sasl.{SaslClientBootstrap, SaslServerBootstrap}
import org.apache.spark.network.server._
import org.apache.spark.rpc._
import org.apache.spark.serializer.{JavaSerializer, JavaSerializerInstance}
import org.apache.spark.util.{ThreadUtils, Utils}

private[netty] class NettyRpcEnv(
    val conf: SparkConf,
    javaSerializerInstance: JavaSerializerInstance,
    host: String,
    securityManager: SecurityManager) extends RpcEnv(conf) with Logging {

  private val transportConf =
    SparkTransportConf.fromSparkConf(conf, conf.getInt("spark.rpc.io.threads", 0))

  private val dispatcher: Dispatcher = new Dispatcher(this)

  private val transportContext =
    new TransportContext(transportConf, new NettyRpcHandler(dispatcher, this))

  private val clientFactory = {
    val bootstraps: Seq[TransportClientBootstrap] =
      if (securityManager.isAuthenticationEnabled()) {
        Seq(new SaslClientBootstrap(transportConf, "", securityManager,
          securityManager.isSaslEncryptionEnabled()))
      } else {
        Nil
      }
    transportContext.createClientFactory(bootstraps.asJava)
  }

  val timeoutScheduler = ThreadUtils.newDaemonSingleThreadScheduledExecutor("netty-rpc-env-timeout")

  // Because TransportClientFactory.createClient is blocking, we need to run it in this thread pool
  // to implement non-blocking send/ask.
  // TODO: a non-blocking TransportClientFactory.createClient in future
  private val clientConnectionExecutor = ThreadUtils.newDaemonCachedThreadPool(
    "netty-rpc-connection",
    conf.getInt("spark.rpc.connect.threads", 256))

  @volatile private var server: TransportServer = _

  def start(port: Int): Unit = {
    val bootstraps: Seq[TransportServerBootstrap] =
      if (securityManager.isAuthenticationEnabled()) {
        Seq(new SaslServerBootstrap(transportConf, securityManager))
      } else {
        Nil
      }
    server = transportContext.createServer(port, bootstraps.asJava)
    dispatcher.registerRpcEndpoint(IDVerifier.NAME, new IDVerifier(this, dispatcher))
  }

  override lazy val address: RpcAddress = {
    require(server != null, "NettyRpcEnv has not yet started")
    RpcAddress(host, server.getPort())
  }

  override def setupEndpoint(name: String, endpoint: RpcEndpoint): RpcEndpointRef = {
    dispatcher.registerRpcEndpoint(name, endpoint)
  }

  def asyncSetupEndpointRefByURI(uri: String): Future[RpcEndpointRef] = {
    val addr = NettyRpcAddress(uri)
    val endpointRef = new NettyRpcEndpointRef(conf, addr, this)
    val idVerifierRef =
      new NettyRpcEndpointRef(conf, NettyRpcAddress(addr.host, addr.port, IDVerifier.NAME), this)
    idVerifierRef.ask[Boolean](ID(endpointRef.name)).flatMap { find =>
      if (find) {
        Future.successful(endpointRef)
      } else {
        Future.failed(new RpcEndpointNotFoundException(uri))
      }
    }(ThreadUtils.sameThread)
  }

  override def stop(endpointRef: RpcEndpointRef): Unit = {
    require(endpointRef.isInstanceOf[NettyRpcEndpointRef])
    dispatcher.stop(endpointRef)
  }

  private[netty] def send(message: RequestMessage): Unit = {
    val remoteAddr = message.receiver.address
    if (remoteAddr == address) {
      val promise = Promise[Any]()
      dispatcher.postMessage(message, promise)
      promise.future.onComplete {
        case Success(response) =>
          val ack = response.asInstanceOf[Ack]
          logDebug(s"Receive ack from ${ack.sender}")
        case Failure(e) =>
          logError(s"Exception when sending $message", e)
      }(ThreadUtils.sameThread)
    } else {
      try {
        // `createClient` will block if it cannot find a known connection, so we should run it in
        // clientConnectionExecutor
        clientConnectionExecutor.execute(new Runnable {
          override def run(): Unit = Utils.tryLogNonFatalError {
            val client = clientFactory.createClient(remoteAddr.host, remoteAddr.port)
            client.sendRpc(serialize(message), new RpcResponseCallback {

              override def onFailure(e: Throwable): Unit = {
                logError(s"Exception when sending $message", e)
              }

              override def onSuccess(response: Array[Byte]): Unit = {
                val ack = deserialize[Ack](response)
                logDebug(s"Receive ack from ${ack.sender}")
              }
            })
          }
        })
      } catch {
        case e: RejectedExecutionException => {
          // `send` after shutting clientConnectionExecutor down, ignore it
          logWarning(s"Cannot send ${message} because RpcEnv is stopped")
        }
      }
    }
  }

  private[netty] def ask(message: RequestMessage): Future[Any] = {
    val promise = Promise[Any]()
    val remoteAddr = message.receiver.address
    if (remoteAddr == address) {
      val p = Promise[Any]()
      dispatcher.postMessage(message, p)
      p.future.onComplete {
        case Success(response) =>
          val reply = response.asInstanceOf[AskResponse]
          if (reply.reply.isInstanceOf[RpcFailure]) {
            if (!promise.tryFailure(reply.reply.asInstanceOf[RpcFailure].e)) {
              logWarning(s"Ignore failure: ${reply.reply}")
            }
          } else if (!promise.trySuccess(reply.reply)) {
            logWarning(s"Ignore message: ${reply}")
          }
        case Failure(e) =>
          if (!promise.tryFailure(e)) {
            logWarning("Ignore Exception", e)
          }
      }(ThreadUtils.sameThread)
    } else {
      try {
        // `createClient` will block if it cannot find a known connection, so we should run it in
        // clientConnectionExecutor
        clientConnectionExecutor.execute(new Runnable {
          override def run(): Unit = {
            val client = clientFactory.createClient(remoteAddr.host, remoteAddr.port)
            client.sendRpc(serialize(message), new RpcResponseCallback {

              override def onFailure(e: Throwable): Unit = {
                if (!promise.tryFailure(e)) {
                  logWarning("Ignore Exception", e)
                }
              }

              override def onSuccess(response: Array[Byte]): Unit = {
                val reply = deserialize[AskResponse](response)
                if (reply.reply.isInstanceOf[RpcFailure]) {
                  if (!promise.tryFailure(reply.reply.asInstanceOf[RpcFailure].e)) {
                    logWarning(s"Ignore failure: ${reply.reply}")
                  }
                } else if (!promise.trySuccess(reply.reply)) {
                  logWarning(s"Ignore message: ${reply}")
                }
              }
            })
          }
        })
      } catch {
        case e: RejectedExecutionException => {
          if (!promise.tryFailure(e)) {
            logWarning(s"Ignore failure", e)
          }
        }
      }
    }
    promise.future
  }

  private[netty] def serialize(content: Any): Array[Byte] = {
    val buffer = javaSerializerInstance.serialize(content)
    Arrays.copyOfRange(
      buffer.array(), buffer.arrayOffset + buffer.position, buffer.arrayOffset + buffer.limit)
  }

  private[netty] def deserialize[T: ClassTag](bytes: Array[Byte]): T = {
    deserialize { () =>
      javaSerializerInstance.deserialize[T](ByteBuffer.wrap(bytes))
    }
  }

  override def endpointRef(endpoint: RpcEndpoint): RpcEndpointRef = {
    dispatcher.getRpcEndpointRef(endpoint)
  }

  override def uriOf(systemName: String, address: RpcAddress, endpointName: String): String =
    new NettyRpcAddress(address.host, address.port, endpointName).toString

  override def shutdown(): Unit = {
    cleanup()
  }

  override def awaitTermination(): Unit = {
    dispatcher.awaitTermination()
  }

  private def cleanup(): Unit = {
    if (timeoutScheduler != null) {
      timeoutScheduler.shutdownNow()
    }
    if (server != null) {
      server.close()
    }
    if (clientFactory != null) {
      clientFactory.close()
    }
    if (dispatcher != null) {
      dispatcher.stop()
    }
    if (clientConnectionExecutor != null) {
      clientConnectionExecutor.shutdownNow()
    }
  }

  override def deserialize[T](deserializationAction: () => T): T = {
    NettyRpcEnv.currentEnv.withValue(this) {
      deserializationAction()
    }
  }
}

private[netty] object NettyRpcEnv extends Logging {

  /**
   * When deserializing the [[NettyRpcEndpointRef]], it needs a reference to [[NettyRpcEnv]].
   * Use `currentEnv` to wrap the deserialization codes. E.g.,
   *
   * {{{
   *   NettyRpcEnv.currentEnv.withValue(this) {
   *     your deserialization codes
   *   }
   * }}}
   */
  private[netty] val currentEnv = new DynamicVariable[NettyRpcEnv](null)
}

private[netty] class NettyRpcEnvFactory extends RpcEnvFactory with Logging {

  def create(config: RpcEnvConfig): RpcEnv = {
    val sparkConf = config.conf
    // Use JavaSerializerInstance in multiple threads is safe. However, if we plan to support
    // KryoSerializer in future, we have to use ThreadLocal to store SerializerInstance
    val javaSerializerInstance =
      new JavaSerializer(sparkConf).newInstance().asInstanceOf[JavaSerializerInstance]
    val nettyEnv =
      new NettyRpcEnv(sparkConf, javaSerializerInstance, config.host, config.securityManager)
    val startNettyRpcEnv: Int => (NettyRpcEnv, Int) = { actualPort =>
      nettyEnv.start(actualPort)
      (nettyEnv, actualPort)
    }
    try {
      Utils.startServiceOnPort(config.port, startNettyRpcEnv, sparkConf, "NettyRpcEnv")._1
    } catch {
      case NonFatal(e) =>
        nettyEnv.shutdown()
        throw e
    }
  }
}

private[netty] class NettyRpcEndpointRef(@transient conf: SparkConf)
  extends RpcEndpointRef(conf) with Serializable with Logging {

  @transient @volatile private var nettyEnv: NettyRpcEnv = _

  @transient @volatile private var _address: NettyRpcAddress = _

  def this(conf: SparkConf, _address: NettyRpcAddress, nettyEnv: NettyRpcEnv) {
    this(conf)
    this._address = _address
    this.nettyEnv = nettyEnv
  }

  override def address: RpcAddress = _address.toRpcAddress

  private def readObject(in: ObjectInputStream): Unit = {
    in.defaultReadObject()
    _address = in.readObject().asInstanceOf[NettyRpcAddress]
    nettyEnv = NettyRpcEnv.currentEnv.value
  }

  private def writeObject(out: ObjectOutputStream): Unit = {
    out.defaultWriteObject()
    out.writeObject(_address)
  }

  override def name: String = _address.name

  override def ask[T: ClassTag](message: Any, timeout: RpcTimeout): Future[T] = {
    val promise = Promise[Any]()
    val timeoutCancelable = nettyEnv.timeoutScheduler.schedule(new Runnable {
      override def run(): Unit = {
        promise.tryFailure(new TimeoutException("Cannot receive any reply in " + timeout.duration))
      }
    }, timeout.duration.toNanos, TimeUnit.NANOSECONDS)
    val f = nettyEnv.ask(RequestMessage(nettyEnv.address, this, message, true))
    f.onComplete { v =>
      timeoutCancelable.cancel(true)
      if (!promise.tryComplete(v)) {
        logWarning(s"Ignore message $v")
      }
    }(ThreadUtils.sameThread)
    promise.future.mapTo[T].recover(timeout.addMessageIfTimeout)(ThreadUtils.sameThread)
  }

  override def send(message: Any): Unit = {
    require(message != null, "Message is null")
    nettyEnv.send(RequestMessage(nettyEnv.address, this, message, false))
  }

  override def toString: String = s"NettyRpcEndpointRef(${_address})"

  def toURI: URI = new URI(s"spark://${_address}")

  final override def equals(that: Any): Boolean = that match {
    case other: NettyRpcEndpointRef => _address == other._address
    case _ => false
  }

  final override def hashCode(): Int = if (_address == null) 0 else _address.hashCode()
}

/**
 * The message that is sent from the sender to the receiver.
 */
private[netty] case class RequestMessage(
    senderAddress: RpcAddress, receiver: NettyRpcEndpointRef, content: Any, needReply: Boolean)

/**
 * The base trait for all messages that are sent back from the receiver to the sender.
 */
private[netty] trait ResponseMessage

/**
 * The reply for `ask` from the receiver side.
 */
private[netty] case class AskResponse(sender: NettyRpcEndpointRef, reply: Any)
  extends ResponseMessage

/**
 * A message to send back to the receiver side. It's necessary because [[TransportClient]] only
 * clean the resources when it receives a reply.
 */
private[netty] case class Ack(sender: NettyRpcEndpointRef) extends ResponseMessage

/**
 * A response that indicates some failure happens in the receiver side.
 */
private[netty] case class RpcFailure(e: Throwable)

/**
 * Maintain the mapping relations between client addresses and [[RpcEnv]] addresses, broadcast
 * network events and forward messages to [[Dispatcher]].
 */
private[netty] class NettyRpcHandler(
    dispatcher: Dispatcher, nettyEnv: NettyRpcEnv) extends RpcHandler with Logging {

  private type ClientAddress = RpcAddress
  private type RemoteEnvAddress = RpcAddress

  // Store all client addresses and their NettyRpcEnv addresses.
  @GuardedBy("this")
  private val remoteAddresses = new mutable.HashMap[ClientAddress, RemoteEnvAddress]()

  // Store the connections from other NettyRpcEnv addresses. We need to keep track of the connection
  // count because `TransportClientFactory.createClient` will create multiple connections
  // (at most `spark.shuffle.io.numConnectionsPerPeer` connections) and randomly select a connection
  // to send the message. See `TransportClientFactory.createClient` for more details.
  @GuardedBy("this")
  private val remoteConnectionCount = new mutable.HashMap[RemoteEnvAddress, Int]()

  override def receive(
      client: TransportClient, message: Array[Byte], callback: RpcResponseCallback): Unit = {
    val requestMessage = nettyEnv.deserialize[RequestMessage](message)
    val addr = client.getChannel().remoteAddress().asInstanceOf[InetSocketAddress]
    assert(addr != null)
    val remoteEnvAddress = requestMessage.senderAddress
    val clientAddr = RpcAddress(addr.getHostName, addr.getPort)
    val broadcastMessage =
      synchronized {
        // If the first connection to a remote RpcEnv is found, we should broadcast "Associated"
        if (remoteAddresses.put(clientAddr, remoteEnvAddress).isEmpty) {
          // clientAddr connects at the first time
          val count = remoteConnectionCount.getOrElse(remoteEnvAddress, 0)
          // Increase the connection number of remoteEnvAddress
          remoteConnectionCount.put(remoteEnvAddress, count + 1)
          if (count == 0) {
            // This is the first connection, so fire "Associated"
            Some(Associated(remoteEnvAddress))
          } else {
            None
          }
        } else {
          None
        }
      }
    broadcastMessage.foreach(dispatcher.broadcastMessage)
    dispatcher.postMessage(requestMessage, callback)
  }

  override def getStreamManager: StreamManager = new OneForOneStreamManager

  override def exceptionCaught(cause: Throwable, client: TransportClient): Unit = {
    val addr = client.getChannel().remoteAddress().asInstanceOf[InetSocketAddress]
    if (addr != null) {
      val clientAddr = RpcAddress(addr.getHostName, addr.getPort)
      val broadcastMessage =
        synchronized {
          remoteAddresses.get(clientAddr).map(AssociationError(cause, _))
        }
      if (broadcastMessage.isEmpty) {
        logError(cause.getMessage, cause)
      } else {
        dispatcher.broadcastMessage(broadcastMessage.get)
      }
    } else {
      // If the channel is closed before connecting, its remoteAddress will be null.
      // See java.net.Socket.getRemoteSocketAddress
      // Because we cannot get a RpcAddress, just log it
      logError("Exception before connecting to the client", cause)
    }
  }

  override def connectionTerminated(client: TransportClient): Unit = {
    val addr = client.getChannel().remoteAddress().asInstanceOf[InetSocketAddress]
    if (addr != null) {
      val clientAddr = RpcAddress(addr.getHostName, addr.getPort)
      val broadcastMessage =
        synchronized {
          // If the last connection to a remote RpcEnv is terminated, we should broadcast
          // "Disassociated"
          remoteAddresses.get(clientAddr).flatMap { remoteEnvAddress =>
            remoteAddresses -= clientAddr
            val count = remoteConnectionCount.getOrElse(remoteEnvAddress, 0)
            assert(count != 0, "remoteAddresses and remoteConnectionCount are not consistent")
            if (count - 1 == 0) {
              // We lost all clients, so clean up and fire "Disassociated"
              remoteConnectionCount.remove(remoteEnvAddress)
              Some(Disassociated(remoteEnvAddress))
            } else {
              // Decrease the connection number of remoteEnvAddress
              remoteConnectionCount.put(remoteEnvAddress, count - 1)
              None
            }
          }
        }
      broadcastMessage.foreach(dispatcher.broadcastMessage)
    } else {
      // If the channel is closed before connecting, its remoteAddress will be null. In this case,
      // we can ignore it since we don't fire "Associated".
      // See java.net.Socket.getRemoteSocketAddress
    }
  }

}