aboutsummaryrefslogtreecommitdiff
path: root/vcluster
blob: 2c46cb504f47e8878afee086efcc833f8707d958 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/bash

print_help() {
    cat <<EOF >&2
Usage:

$0 init <spark_source>
$0 start <n>
$0 stop
$0 shell
$0 list

EOF
}

fail() {
    echo "$1" >&2
    exit 1
}

init() {
    spark_source="$1"
    stage_dir=$(mktemp -d)

    [ -n "$spark_source" ] || fail $(print_help)
    [ -e "$spark_source" ] || fail "No such file or directory: $spark_source"

    # build spark
    (cd "$spark_source" && ./dev/make-distribution.sh --name custom-spark -Phadoop-2.7)
    cp -r "$spark_source/dist" "$stage_dir"

    # prepare common docker image
    cat <<EOF > "$stage_dir/Dockerfile.spark"
FROM debian:jessie-backports

MAINTAINER Jakob Odersky <jakob@odersky.com>

# install base utilities
RUN \
    apt-get update && \
    apt-get install -y \
        wget \
        curl \
        git \
	openssl \
	ssh \
	openjdk-8-jdk \
	python \
	apt-transport-https \
	nano && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

COPY "$stage_dir"/dist /opt/spark

RUN adduser --system --group --home /run/spark spark

ENV SPARK_LOG_DIR /run/spark
ENV SPARK_NO_DAEMONIZE true

EXPOSE 6066
EXPOSE 7077
EXPOSE 8080

USER spark
WORKDIR /run/spark

EOF
    docker build -f "$stage_dir/Dockerfile.spark" -t jodersky/spark:latest "$stage_dir"

    # prepare master docker image
    cat <<EOF > "$stage_dir/Dockerfile.master"
FROM jodersky/spark:latest
MAINTAINER Jakob Odersky <jakob@odersky.com>
ENTRYPOINT ["/opt/spark/sbin/start-master.sh"]

EOF
    docker build -f "$stage_dir/Dockerfile.master" -t jodersky/spark-master:latest "$stage_dir"

    # prepare worker docker image
    cat <<EOF > "$stage_dir/Dockerfile.worker"
FROM jodersky/spark:latest
MAINTAINER Jakob Odersky <jakob@odersky.com>
ENV SPARK_WORKER_DIR /run/spark
ENTRYPOINT ["/opt/spark/sbin/start-slave.sh"]

EOF
    docker build -f "$stage_dir/Dockerfile.worker" -t jodersky/spark-worker:latest "$stage_dir"
}


while [ $# -gt 0 ]; do
    case "$1" in
	init)
	    shift
	    init "$1"
	    ;;
	
	start)
	    shift
	    workers="${1:-5}"
	    docker run \
		   --label spark=master \
		   --detach \
		   jodersky/spark-master

	    cores_total=$(grep -c ^processor /proc/cpuinfo)
	    cores_worker=$(( (cores_total + workers - 1) / workers)) # round up

	    mem_total=$(cat /proc/meminfo | grep MemTotal | awk '{print $2}') # in kb
	    mem_worker=$((mem_total / workers))

	    for i in $(seq 1 "$workers"); do
		docker run \
		       --label spark=worker \
		       --detach \
		       jodersky/spark-worker \
		       --cores "$cores_worker" \
		       --memory "$mem_worker"k \
		       spark://172.17.0.2:7077 
	    done
	    ;;

	shell)
	    docker run \
		   --label spark=shell \
		   --interactive \
		   --tty \
		   jodersky/spark /opt/spark/bin/spark-shell \
		   --master=spark://172.17.0.2:7077
	    ;;
	
	stop)
	    containers=$(docker ps -q --format="{{.ID}}" --filter label=spark)
	    if [[ -n "$containers" ]]; then
		docker kill $containers
		docker rm $containers
	    fi
	    ;;

	list)
	    docker ps -q --format="{{.ID}} {{.Labels}} {{.Command}}" --filter label=spark --no-trunc
	    ;;
	
	help|-h|--help|*)
	    print_help
	;;
    esac
    shift
done