blob: 2c46cb504f47e8878afee086efcc833f8707d958 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
#!/bin/bash
print_help() {
cat <<EOF >&2
Usage:
$0 init <spark_source>
$0 start <n>
$0 stop
$0 shell
$0 list
EOF
}
fail() {
echo "$1" >&2
exit 1
}
init() {
spark_source="$1"
stage_dir=$(mktemp -d)
[ -n "$spark_source" ] || fail $(print_help)
[ -e "$spark_source" ] || fail "No such file or directory: $spark_source"
# build spark
(cd "$spark_source" && ./dev/make-distribution.sh --name custom-spark -Phadoop-2.7)
cp -r "$spark_source/dist" "$stage_dir"
# prepare common docker image
cat <<EOF > "$stage_dir/Dockerfile.spark"
FROM debian:jessie-backports
MAINTAINER Jakob Odersky <jakob@odersky.com>
# install base utilities
RUN \
apt-get update && \
apt-get install -y \
wget \
curl \
git \
openssl \
ssh \
openjdk-8-jdk \
python \
apt-transport-https \
nano && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
COPY "$stage_dir"/dist /opt/spark
RUN adduser --system --group --home /run/spark spark
ENV SPARK_LOG_DIR /run/spark
ENV SPARK_NO_DAEMONIZE true
EXPOSE 6066
EXPOSE 7077
EXPOSE 8080
USER spark
WORKDIR /run/spark
EOF
docker build -f "$stage_dir/Dockerfile.spark" -t jodersky/spark:latest "$stage_dir"
# prepare master docker image
cat <<EOF > "$stage_dir/Dockerfile.master"
FROM jodersky/spark:latest
MAINTAINER Jakob Odersky <jakob@odersky.com>
ENTRYPOINT ["/opt/spark/sbin/start-master.sh"]
EOF
docker build -f "$stage_dir/Dockerfile.master" -t jodersky/spark-master:latest "$stage_dir"
# prepare worker docker image
cat <<EOF > "$stage_dir/Dockerfile.worker"
FROM jodersky/spark:latest
MAINTAINER Jakob Odersky <jakob@odersky.com>
ENV SPARK_WORKER_DIR /run/spark
ENTRYPOINT ["/opt/spark/sbin/start-slave.sh"]
EOF
docker build -f "$stage_dir/Dockerfile.worker" -t jodersky/spark-worker:latest "$stage_dir"
}
while [ $# -gt 0 ]; do
case "$1" in
init)
shift
init "$1"
;;
start)
shift
workers="${1:-5}"
docker run \
--label spark=master \
--detach \
jodersky/spark-master
cores_total=$(grep -c ^processor /proc/cpuinfo)
cores_worker=$(( (cores_total + workers - 1) / workers)) # round up
mem_total=$(cat /proc/meminfo | grep MemTotal | awk '{print $2}') # in kb
mem_worker=$((mem_total / workers))
for i in $(seq 1 "$workers"); do
docker run \
--label spark=worker \
--detach \
jodersky/spark-worker \
--cores "$cores_worker" \
--memory "$mem_worker"k \
spark://172.17.0.2:7077
done
;;
shell)
docker run \
--label spark=shell \
--interactive \
--tty \
jodersky/spark /opt/spark/bin/spark-shell \
--master=spark://172.17.0.2:7077
;;
stop)
containers=$(docker ps -q --format="{{.ID}}" --filter label=spark)
if [[ -n "$containers" ]]; then
docker kill $containers
docker rm $containers
fi
;;
list)
docker ps -q --format="{{.ID}} {{.Labels}} {{.Command}}" --filter label=spark --no-trunc
;;
help|-h|--help|*)
print_help
;;
esac
shift
done
|