python/pyspark/daemon.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

import os
import sys
import multiprocessing
from ctypes import c_bool
from errno import EINTR, ECHILD
from socket import socket, AF_INET, SOCK_STREAM, SOMAXCONN
from signal import signal, SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN
from pyspark.worker import main as worker_main
from pyspark.serializers import write_int

try:
    POOLSIZE = multiprocessing.cpu_count()
except NotImplementedError:
    POOLSIZE = 4

exit_flag = multiprocessing.Value(c_bool, False)


def should_exit():
    global exit_flag
    return exit_flag.value


def worker(listen_sock):
    # Redirect stdout to stderr
    os.dup2(2, 1)

    # Manager sends SIGHUP to request termination of workers in the pool
    def handle_sighup(*args):
        assert should_exit()
    signal(SIGHUP, handle_sighup)

    # Cleanup zombie children
    def handle_sigchld(*args):
        pid = status = None
        try:
            while (pid, status) != (0, 0):
                pid, status = os.waitpid(0, os.WNOHANG)
        except EnvironmentError as err:
            if err.errno == EINTR:
                # retry
                handle_sigchld()
            elif err.errno != ECHILD:
                raise
    signal(SIGCHLD, handle_sigchld)

    # Handle clients
    while not should_exit():
        # Wait until a client arrives or we have to exit
        sock = None
        while not should_exit() and sock is None:
            try:
                sock, addr = listen_sock.accept()
            except EnvironmentError as err:
                if err.errno != EINTR:
                    raise

        if sock is not None:
            # Fork a child to handle the client.
            # The client is handled in the child so that the manager
            # never receives SIGCHLD unless a worker crashes.
            if os.fork() == 0:
                # Leave the worker pool
                signal(SIGHUP, SIG_DFL)
                listen_sock.close()
                # Handle the client then exit
                sockfile = sock.makefile()
                worker_main(sockfile, sockfile)
                sockfile.close()
                sock.close()
                os._exit(0)
            else:
                sock.close()


def launch_worker(listen_sock):
    if os.fork() == 0:
        try:
            worker(listen_sock)
        except Exception as err:
            import traceback
            traceback.print_exc()
            os._exit(1)
        else:
            assert should_exit()
            os._exit(0)


def manager():
    # Create a new process group to corral our children
    os.setpgid(0, 0)

    # Create a listening socket on the AF_INET loopback interface
    listen_sock = socket(AF_INET, SOCK_STREAM)
    listen_sock.bind(('127.0.0.1', 0))
    listen_sock.listen(max(1024, 2 * POOLSIZE, SOMAXCONN))
    listen_host, listen_port = listen_sock.getsockname()
    write_int(listen_port, sys.stdout)

    # Launch initial worker pool
    for idx in range(POOLSIZE):
        launch_worker(listen_sock)
    listen_sock.close()

    def shutdown():
        global exit_flag
        exit_flag.value = True

    # Gracefully exit on SIGTERM, don't die on SIGHUP
    signal(SIGTERM, lambda signum, frame: shutdown())
    signal(SIGHUP, SIG_IGN)

    # Cleanup zombie children
    def handle_sigchld(*args):
        try:
            pid, status = os.waitpid(0, os.WNOHANG)
            if status != 0 and not should_exit():
                raise RuntimeError("worker crashed: %s, %s" % (pid, status))
        except EnvironmentError as err:
            if err.errno not in (ECHILD, EINTR):
                raise
    signal(SIGCHLD, handle_sigchld)

    # Initialization complete
    sys.stdout.close()
    try:
        while not should_exit():
            try:
                # Spark tells us to exit by closing stdin
                if os.read(0, 512) == '':
                    shutdown()
            except EnvironmentError as err:
                if err.errno != EINTR:
                    shutdown()
                    raise
    finally:
        signal(SIGTERM, SIG_DFL)
        exit_flag.value = True
        # Send SIGHUP to notify workers of shutdown
        os.kill(0, SIGHUP)


if __name__ == '__main__':
    manager()