diff options
29 files changed, 826 insertions, 105 deletions
@@ -35,7 +35,7 @@ Zookeeper at `zookeeper.kafka.svc.cluster.local:2181`. For Minikube run `kubectl apply -f configure/minikube-storageclass-broker.yml; kubectl apply -f configure/minikube-storageclass-zookeeper.yml`. -There's a similar setup for GKE, `configure/gke-*`. You might want to tweak it before creating. +There's a similar setup for AKS under `configure/aks-*` and for GKE under `configure/gke-*`. You might want to tweak it before creating. ## Start Zookeeper @@ -60,12 +60,10 @@ kubectl -n kafka logs kafka-0 | grep "Registered broker" ``` That's it. Just add business value :wink:. -For clients we tend to use [librdkafka](https://github.com/edenhill/librdkafka)-based drivers like [node-rdkafka](https://github.com/Blizzard/node-rdkafka). -To use [Kafka Connect](http://kafka.apache.org/documentation/#connect) and [Kafka Streams](http://kafka.apache.org/documentation/streams/) you may want to take a look at our [sample](https://github.com/solsson/dockerfiles/tree/master/connect-files) [Dockerfile](https://github.com/solsson/dockerfiles/tree/master/streams-logfilter)s. ## RBAC -For clusters that enfoce [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/) there's a minimal set of policies in +For clusters that enforce [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/) there's a minimal set of policies in ``` kubectl apply -f rbac-namespace-default/ ``` diff --git a/configure/aks-storageclass-broker-managed.yml b/configure/aks-storageclass-broker-managed.yml new file mode 100644 index 0000000..1c9b3c6 --- /dev/null +++ b/configure/aks-storageclass-broker-managed.yml @@ -0,0 +1,9 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: kafka-broker +provisioner: kubernetes.io/azure-disk +reclaimPolicy: Retain +parameters: + kind: "Managed" + storageaccounttype: Premium_LRS
\ No newline at end of file diff --git a/configure/aks-storageclass-zookeeper-managed.yml b/configure/aks-storageclass-zookeeper-managed.yml new file mode 100644 index 0000000..6963dcc --- /dev/null +++ b/configure/aks-storageclass-zookeeper-managed.yml @@ -0,0 +1,9 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: kafka-zookeeper +provisioner: kubernetes.io/azure-disk +reclaimPolicy: Retain +parameters: + kind: "Managed" + storageaccounttype: Premium_LRS
\ No newline at end of file diff --git a/configure/aws-storageclass-broker-gp2.yml b/configure/aws-storageclass-broker-gp2.yml new file mode 100644 index 0000000..94996b5 --- /dev/null +++ b/configure/aws-storageclass-broker-gp2.yml @@ -0,0 +1,10 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: kafka-broker + labels: + k8s-addon: storage-aws.addons.k8s.io +provisioner: kubernetes.io/aws-ebs +reclaimPolicy: Retain +parameters: + type: gp2 diff --git a/configure/aws-storageclass-zookeeper-gp2.yml b/configure/aws-storageclass-zookeeper-gp2.yml new file mode 100644 index 0000000..806e455 --- /dev/null +++ b/configure/aws-storageclass-zookeeper-gp2.yml @@ -0,0 +1,10 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: kafka-zookeeper + labels: + k8s-addon: storage-aws.addons.k8s.io +provisioner: kubernetes.io/aws-ebs +reclaimPolicy: Retain +parameters: + type: gp2 diff --git a/configure/docker-storageclass-broker.yml b/configure/docker-storageclass-broker.yml new file mode 100644 index 0000000..4a99776 --- /dev/null +++ b/configure/docker-storageclass-broker.yml @@ -0,0 +1,6 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: kafka-broker +provisioner: docker.io/hostpath +reclaimPolicy: Retain diff --git a/configure/docker-storageclass-zookeeper.yml b/configure/docker-storageclass-zookeeper.yml new file mode 100644 index 0000000..038f2c8 --- /dev/null +++ b/configure/docker-storageclass-zookeeper.yml @@ -0,0 +1,6 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: kafka-zookeeper +provisioner: docker.io/hostpath +reclaimPolicy: Retain diff --git a/events-kube/events-kube-kafka.yml b/events-kube/events-kube-kafka.yml new file mode 100644 index 0000000..31703b4 --- /dev/null +++ b/events-kube/events-kube-kafka.yml @@ -0,0 +1,51 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: events-kube-kafka + namespace: kafka +spec: + replicas: 1 + strategy: + type: RollingUpdate + rollingUpdate: + # prefer duplicate events over missed + maxUnavailable: 0 + maxSurge: 1 + selector: + matchLabels: + app: events + from: kube + to: kafka + template: + metadata: + labels: + app: events + from: kube + to: kafka + spec: + containers: + - name: kafkacat-curl + image: solsson/kafkacat-curl@sha256:b5484379301937129e67550331782a0f7ac030a4b913a254d084faea4bcf44a2 + env: + - name: BOOTSTRAP + value: bootstrap.kafka:9092 + - name: TOPIC + value: ops.kube-events-all.stream.json.001 + command: + - /bin/bash + - -ec + - > + curl + -f + -s + --cacert /run/secrets/kubernetes.io/serviceaccount/ca.crt + --header "Authorization: Bearer $(cat /run/secrets/kubernetes.io/serviceaccount/token)" + https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1/watch/events + | + kafkacat + -b $BOOTSTRAP + -t $TOPIC + -P + -z snappy + -v + -d broker,topic diff --git a/events-kube/rbac/cluster-events-watcher.yml b/events-kube/rbac/cluster-events-watcher.yml new file mode 100644 index 0000000..c8384b6 --- /dev/null +++ b/events-kube/rbac/cluster-events-watcher.yml @@ -0,0 +1,30 @@ +# If events-kube-kafka-* goes crashlooping you probably need this +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: events-watcher + labels: + origin: github.com_Yolean_kubernetes-kafka +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - watch +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: kafka-events-watcher + labels: + origin: github.com_Yolean_kubernetes-kafka +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: events-watcher +subjects: +- kind: ServiceAccount + name: default + namespace: kafka diff --git a/events-kube/test/events-topic.yml b/events-kube/test/events-topic.yml new file mode 100644 index 0000000..0c48c36 --- /dev/null +++ b/events-kube/test/events-topic.yml @@ -0,0 +1,89 @@ +--- +kind: ConfigMap +metadata: + name: events-topic + namespace: test-kafka +apiVersion: v1 +data: + + setup.sh: |- + touch /tmp/testlog + + tail -f /tmp/testlog + + test.sh: |- + exec >> /tmp/testlog + exec 2>&1 + + PREVIOUS=$(sha1sum /tmp/event 2>/dev/null || echo "") + kafkacat -b $BOOTSTRAP -t $TOPIC -C -o -1 -c 1 | tee /tmp/event + CURRENT=$(sha1sum /tmp/event) + [ "$PREVIOUS" == "$CURRENT" ] && echo "{\"test-result\": \"No new event in $TOPIC\"}" && exit 1 + + exit 0 + + quit-on-nonzero-exit.sh: |- + exec >> /tmp/testlog + exec 2>&1 + + exit 0 + +--- +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: events-topic + namespace: test-kafka +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + test-target: events-topic + test-type: readiness + template: + metadata: + labels: + test-target: events-topic + test-type: readiness + # for example: + # readonly - can be used in production + # isolated - read/write but in a manner that does not affect other services + # load - unsuitable for production because it uses significant resources + # chaos - unsuitable for production because it injects failure modes + #test-use: + spec: + containers: + - name: testcase + image: solsson/kafkacat@sha256:2c539e4f58960ab7872976ebc664dd92de18cf27e7cbbeb296d654a2351f6ca4 + env: + - name: BOOTSTRAP + value: bootstrap.kafka:9092 + - name: TOPIC + value: ops.kube-events-all.stream.json.001 + command: + - /bin/bash + - -e + - /test/setup.sh + readinessProbe: + exec: + command: + - /bin/bash + - -e + - /test/test.sh + initialDelaySeconds: 10 + periodSeconds: 60 + livenessProbe: + exec: + command: + - /bin/bash + - -e + - /test/quit-on-nonzero-exit.sh + volumeMounts: + - name: config + mountPath: /test + volumes: + - name: config + configMap: + name: events-topic diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index c0c4c8c..debfda1 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -11,6 +11,9 @@ data: KAFKA_BROKER_ID=${HOSTNAME##*-} sed -i "s/#init#broker.id=#init#/broker.id=$KAFKA_BROKER_ID/" /etc/kafka/server.properties + LABELS="kafka-broker-id=$KAFKA_BROKER_ID" + ANNOTATIONS="" + hash kubectl 2>/dev/null || { sed -i "s/#init#broker.rack=#init#/#init#broker.rack=# kubectl not found in path/" /etc/kafka/server.properties } && { @@ -21,36 +24,47 @@ data: sed -i "s/#init#broker.rack=#init#/#init#broker.rack=# zone label not found for node $NODE_NAME/" /etc/kafka/server.properties else sed -i "s/#init#broker.rack=#init#/broker.rack=$ZONE/" /etc/kafka/server.properties + LABELS="$LABELS kafka-broker-rack=$ZONE" fi - kubectl -n $POD_NAMESPACE label pod $POD_NAME kafka-broker-id=$KAFKA_BROKER_ID - OUTSIDE_HOST=$(kubectl get node "$NODE_NAME" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') if [ $? -ne 0 ]; then echo "Outside (i.e. cluster-external access) host lookup command failed" else - OUTSIDE_HOST=${OUTSIDE_HOST}:3240${KAFKA_BROKER_ID} - sed -i "s|#init#advertised.listeners=OUTSIDE://#init#|advertised.listeners=OUTSIDE://${OUTSIDE_HOST}|" /etc/kafka/server.properties + OUTSIDE_PORT=3240${KAFKA_BROKER_ID} + sed -i "s|#init#advertised.listeners=OUTSIDE://#init#|advertised.listeners=OUTSIDE://${OUTSIDE_HOST}:${OUTSIDE_PORT}|" /etc/kafka/server.properties + ANNOTATIONS="$ANNOTATIONS kafka-listener-outside-host=$OUTSIDE_HOST kafka-listener-outside-port=$OUTSIDE_PORT" + fi + + if [ ! -z "$LABELS" ]; then + kubectl -n $POD_NAMESPACE label pod $POD_NAME $LABELS || echo "Failed to label $POD_NAMESPACE.$POD_NAME - RBAC issue?" + fi + if [ ! -z "$ANNOTATIONS" ]; then + kubectl -n $POD_NAMESPACE annotate pod $POD_NAME $ANNOTATIONS || echo "Failed to annotate $POD_NAMESPACE.$POD_NAME - RBAC issue?" fi } server.properties: |- - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - - # see kafka.server.KafkaConfig for additional details and defaults + ############################# Log Basics ############################# + + # A comma seperated list of directories under which to store log files + # Overrides log.dir + log.dirs=/var/lib/kafka/data/topics + + # The default number of log partitions per topic. More partitions allow greater + # parallelism for consumption, but this will also result in more files across + # the brokers. + num.partitions=1 + + default.replication.factor=3 + + min.insync.replicas=2 + + auto.create.topics.enable=true + + # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. + # This value is recommended to be increased for installations with data dirs located in RAID array. + num.recovery.threads.per.data.dir=1 ############################# Server Basics ############################# @@ -96,27 +110,12 @@ data: # The maximum size of a request that the socket server will accept (protection against OOM) socket.request.max.bytes=104857600 - - ############################# Log Basics ############################# - - # A comma seperated list of directories under which to store log files - log.dirs=/tmp/kafka-logs - - # The default number of log partitions per topic. More partitions allow greater - # parallelism for consumption, but this will also result in more files across - # the brokers. - num.partitions=1 - - # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. - # This value is recommended to be increased for installations with data dirs located in RAID array. - num.recovery.threads.per.data.dir=1 - ############################# Internal Topic Settings ############################# # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" # For anything other than development testing, a value greater than 1 is recommended for to ensure availability such as 3. - offsets.topic.replication.factor=1 - transaction.state.log.replication.factor=1 - transaction.state.log.min.isr=1 + #offsets.topic.replication.factor=1 + #transaction.state.log.replication.factor=1 + #transaction.state.log.min.isr=1 ############################# Log Flush Policy ############################# @@ -143,7 +142,7 @@ data: # from the end of the log. # The minimum age of a log file to be eligible for deletion due to age - log.retention.hours=168 + log.retention.hours=-1 # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining # segments don't drop below log.retention.bytes. Functions independently of log.retention.hours. @@ -163,7 +162,7 @@ data: # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". # You can also append an optional chroot string to the urls to specify the # root directory for all kafka znodes. - zookeeper.connect=localhost:2181 + zookeeper.connect=zookeeper:2181 # Timeout in ms for connecting to zookeeper zookeeper.connection.timeout.ms=6000 @@ -179,21 +178,6 @@ data: group.initial.rebalance.delay.ms=0 log4j.properties: |- - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # Unspecified loggers and loggers with additivity=true output to server.log and stdout # Note that INFO only applies to unspecified loggers, the log level of the child logger is used otherwise log4j.rootLogger=INFO, stdout diff --git a/kafka/50kafka.yml b/kafka/50kafka.yml index 1157235..229bc4d 100644 --- a/kafka/50kafka.yml +++ b/kafka/50kafka.yml @@ -4,7 +4,6 @@ metadata: name: kafka namespace: kafka spec: -spec: selector: matchLabels: app: kafka @@ -57,14 +56,6 @@ spec: command: - ./bin/kafka-server-start.sh - /etc/kafka/server.properties - - --override - - zookeeper.connect=zookeeper:2181 - - --override - - log.retention.hours=-1 - - --override - - log.dirs=/var/lib/kafka/data/topics - - --override - - auto.create.topics.enable=false resources: requests: cpu: 100m diff --git a/kafka/test/kafkacat.yml b/kafka/test/kafkacat.yml index 8d6c64e..d3c2766 100644 --- a/kafka/test/kafkacat.yml +++ b/kafka/test/kafkacat.yml @@ -83,8 +83,6 @@ spec: - test-kafkacat - --partitions - "1" - - --replication-factor - - "2" restartPolicy: Never --- apiVersion: apps/v1beta2 @@ -93,6 +91,7 @@ metadata: name: kafkacat namespace: test-kafka spec: + # Note that this test sets a consumer group, but asserts assume that the tests gets its own messages replicas: 1 selector: matchLabels: @@ -103,19 +102,13 @@ spec: labels: test-target: kafka-client-kafkacat test-type: readiness - # for example: - # readonly - can be used in production - # isolated - read/write but in a manner that does not affect other services - # load - unsuitable for production because it uses significant resources - # chaos - unsuitable for production because it injects failure modes - #test-use: spec: containers: - name: producer image: solsson/kafkacat@sha256:b32eedf936f3cde44cd164ddc77dfcf7565a8af4e357ff6de1abe4389ca530c9 env: - name: BOOTSTRAP - value: kafka-0.broker.kafka.svc.cluster.local:9092 + value: bootstrap.kafka:9092 command: - /bin/bash - -cex @@ -134,12 +127,14 @@ spec: image: solsson/kafkacat@sha256:b32eedf936f3cde44cd164ddc77dfcf7565a8af4e357ff6de1abe4389ca530c9 env: - name: BOOTSTRAP - value: kafka-0.broker.kafka.svc.cluster.local:9092 + value: bootstrap.kafka:9092 + - name: CONSUMER_GROUP_ID + value: test-kafkacat-group command: - /bin/bash - -cex - > - kafkacat -C -b $BOOTSTRAP -t test-kafkacat -o -1 -f '%T;%k:%p;%o;%s\n' -u -d broker | + kafkacat -b $BOOTSTRAP -G $CONSUMER_GROUP_ID test-kafkacat -o -1 -f '%T;%k:%p;%o;%s\n' -u -d broker | tee /shared/consumed.tmp ; volumeMounts: diff --git a/kafka/test/produce-consume.yml b/kafka/test/produce-consume.yml index a326f01..71228b8 100644 --- a/kafka/test/produce-consume.yml +++ b/kafka/test/produce-consume.yml @@ -66,8 +66,6 @@ spec: - test-produce-consume - --partitions - "1" - - --replication-factor - - "2" restartPolicy: Never --- apiVersion: apps/v1beta2 @@ -98,7 +96,7 @@ spec: image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: - name: BOOTSTRAP - value: kafka-0.broker.kafka.svc.cluster.local:9092 + value: bootstrap.kafka:9092 command: - /bin/bash - -cex @@ -117,7 +115,7 @@ spec: image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: - name: BOOTSTRAP - value: kafka-0.broker.kafka.svc.cluster.local:9092 + value: bootstrap.kafka:9092 command: - /bin/bash - -cex @@ -134,7 +132,7 @@ spec: image: solsson/kafkacat@sha256:ebebf47061300b14a4b4c2e1e4303ab29f65e4b95d34af1b14bb8f7ec6da7cef env: - name: BOOTSTRAP - value: kafka-0.broker.kafka.svc.cluster.local:9092 + value: bootstrap.kafka:9092 command: - /bin/bash - -e diff --git a/kafka/test/replication-config.yml b/kafka/test/replication-config.yml new file mode 100644 index 0000000..abaea49 --- /dev/null +++ b/kafka/test/replication-config.yml @@ -0,0 +1,98 @@ +# https://github.com/Yolean/kubernetes-kafka/pull/140 +--- +kind: ConfigMap +metadata: + name: replication-config + namespace: test-kafka +apiVersion: v1 +data: + + setup.sh: |- + touch /tmp/testlog + + tail -f /tmp/testlog + + test.sh: |- + exec >> /tmp/testlog + exec 2>&1 + set -e + + kafkacat -L -b $BOOTSTRAP > /tmp/metadata + BROKERS=$(cat /tmp/metadata | grep -E ' [0-9]+ brokers:' | awk '{print $1}') + if (( $BROKERS == 1 )); then + echo "Only one broker; no need to check for >1 replication config." + exit 0 + fi + + WITH_TWO_OR_MORE=$(cat /tmp/metadata | grep -E ' replicas: [0-9]+,[0-9]+' | wc -l) + WITH_ONE=$(cat /tmp/metadata | grep -E ' replicas: [0-9]+, ' | wc -l) + if (( $WITH_TWO_OR_MORE == 0 )); then + echo "No partitions have >1 replica, so this is probably normal." + exit 0 + fi + + if (( $WITH_ONE > $MAX_SINGLE_REPLICA_PARTITIONS )); then + echo "$(date --iso-8601='ns') There are $WITH_ONE partitions with only one replica. Alerts for under-replicated partitions won't catch that." + exit 10 + fi + + echo "$(date --iso-8601='ns') $WITH_ONE partitions have one replica and WITH_TWO_OR_MORE have more" + exit 0 + + quit-on-nonzero-exit.sh: |- + exec >> /tmp/testlog + exec 2>&1 + + exit 0 +--- +apiVersion: apps/v1beta2 +kind: ReplicaSet +metadata: + name: replication-config + namespace: test-kafka +spec: + # Note that this test sets a consumer group, but asserts assume that the tests gets its own messages + replicas: 1 + selector: + matchLabels: + test-target: kafka-replication-config + test-type: readiness + template: + metadata: + labels: + test-target: kafka-replication-config + test-type: readiness + spec: + containers: + - name: testcase + image: solsson/kafkacat@sha256:b32eedf936f3cde44cd164ddc77dfcf7565a8af4e357ff6de1abe4389ca530c9 + env: + - name: BOOTSTRAP + value: bootstrap.kafka:9092 + - name: MAX_SINGLE_REPLICA_PARTITIONS + value: "0" + command: + - /bin/bash + - -e + - /test/setup.sh + readinessProbe: + exec: + command: + - /bin/bash + - -e + - /test/test.sh + initialDelaySeconds: 30 + periodSeconds: 60 + livenessProbe: + exec: + command: + - /bin/bash + - -e + - /test/quit-on-nonzero-exit.sh + volumeMounts: + - name: config + mountPath: /test + volumes: + - name: config + configMap: + name: replication-config diff --git a/linkedin-burrow/burrow-config.yml b/linkedin-burrow/burrow-config.yml new file mode 100644 index 0000000..5fe6dda --- /dev/null +++ b/linkedin-burrow/burrow-config.yml @@ -0,0 +1,36 @@ +kind: ConfigMap +metadata: + name: burrow-config + namespace: kafka +apiVersion: v1 +data: + burrow.toml: |- + [zookeeper] + servers=[ "zookeeper:2181" ] + timeout=6 + root-path="/burrow" + + [cluster.local] + class-name="kafka" + servers=[ "kafka-0.broker:9092", "kafka-1.broker:9092", "kafka-2.broker:9092" ] + topic-refresh=60 + offset-refresh=30 + + [consumer.local] + class-name="kafka" + cluster="local" + servers=[ "kafka-0.broker:9092", "kafka-1.broker:9092", "kafka-2.broker:9092" ] + group-blacklist="" + group-whitelist="" + + [consumer.local_zk] + class-name="kafka_zk" + cluster="local" + servers=[ "zookeeper:2181" ] + zookeeper-path="/local" + zookeeper-timeout=30 + group-blacklist="" + group-whitelist="" + + [httpserver.default] + address=":8000" diff --git a/linkedin-burrow/burrow-service.yml b/linkedin-burrow/burrow-service.yml new file mode 100644 index 0000000..15eac06 --- /dev/null +++ b/linkedin-burrow/burrow-service.yml @@ -0,0 +1,18 @@ +kind: Service +apiVersion: v1 +metadata: + name: burrow + namespace: kafka +spec: + selector: + app: burrow + ports: + - name: web + protocol: TCP + port: 80 + - name: api + protocol: TCP + port: 8000 + - name: prometheus + protocol: TCP + port: 8080 diff --git a/linkedin-burrow/burrow.yml b/linkedin-burrow/burrow.yml new file mode 100644 index 0000000..2e8c6b5 --- /dev/null +++ b/linkedin-burrow/burrow.yml @@ -0,0 +1,60 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: burrow + namespace: kafka +spec: + replicas: 1 + selector: + matchLabels: + app: burrow + template: + metadata: + labels: + app: burrow + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + spec: + containers: + - name: burrow + image: solsson/burrow@sha256:a6fe5d1268a7e7d58f7f960697fc8fda3e6861a2653fe71135a2207c9cf18cf0 + ports: + - name: api + containerPort: 8000 + readinessProbe: + httpGet: + path: /burrow/admin + port: 8000 + livenessProbe: + httpGet: + path: /burrow/admin + port: 8000 + volumeMounts: + - name: config + mountPath: /etc/burrow + - name: prom + image: solsson/burrow-exporter:api-v3@sha256:dd690c04ae31c62e4d7d5398f24f368fb2c48046ec3ae68bbc7582c114819a7b + ports: + - name: prometheus + containerPort: 8080 + env: + - name: BURROW_ADDR + value: http://localhost:8000 + - name: METRICS_ADDR + value: 0.0.0.0:8080 + - name: INTERVAL + value: "30" + - name: dashboard + image: joway/burrow-dashboard@sha256:b4cce87c8264119d73f9a6df5d787ea811ce2138d39ca6cd56525bcfbb5169bf + env: + - name: BURROW_BACKEND + value: http://localhost:8000 + ports: + - name: web + containerPort: 80 + protocol: TCP + volumes: + - name: config + configMap: + name: burrow-config diff --git a/maintenance/README.md b/maintenance/README.md new file mode 100644 index 0000000..5830d79 --- /dev/null +++ b/maintenance/README.md @@ -0,0 +1,32 @@ + +## Re-assign Leadership + +This is one of the cases where this repo begs to differ from traditional Kafka setups. +In Kubernetes the restart of a pod, and subsequent start on a different node, should be a non-event. + +> ”when a broker is stopped and restarted, it does not resume leadership of any partitions automatically” + +_-- Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”_ + +Create the `preferred-replica-election-job.yml` resource, after deleting any previous one. + +## Change a Partition's Replicas + +> ”From time to time, it may be necessary to change the replica assignments for a partition. Some examples of when this might be needed are: +> * If a topic’s partitions are not balanced across the cluster, causing uneven load on brokers +> * If a broker is taken offline and the partition is under-replicated +> * If a new broker is added and needs to receive a share of the cluster load” + +_-- Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”_ + +Use the `reassign-paritions-job.yml`, after editing `TOPICS` and `BROKERS`. + +## Increase a topic's replication factor + +See https://github.com/Yolean/kubernetes-kafka/pull/140 + +Use the `replication-factor-increase-job.yml`, after editing `TOPICS` and `BROKERS`. + +The affected topics may end up without a preferred replica. See above to fix that, +or to affect only your selected topics use [Kafka Manager](https://github.com/Yolean/kubernetes-kafka/pull/83)'s topic screen, +Generate Partition Assignments followed by Reassign Partitions. diff --git a/maintenance/preferred-replica-election-job.yml b/maintenance/preferred-replica-election-job.yml new file mode 100644 index 0000000..ac4f13a --- /dev/null +++ b/maintenance/preferred-replica-election-job.yml @@ -0,0 +1,19 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: preferred-replica-election + namespace: kafka +spec: + template: + metadata: + name: preferred-replica-election + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + command: + - ./bin/kafka-preferred-replica-election.sh + - --zookeeper + - zookeeper:2181 + restartPolicy: Never + backoffLimit: 3 diff --git a/maintenance/reassign-paritions-job.yml b/maintenance/reassign-paritions-job.yml new file mode 100644 index 0000000..e9e184e --- /dev/null +++ b/maintenance/reassign-paritions-job.yml @@ -0,0 +1,51 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: reassign-partitions + namespace: kafka +spec: + template: + metadata: + name: reassign-partitions + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + env: + - name: ZOOKEEPER + value: zookeeper.kafka:2181 + # the following must be edited per job + - name: TOPICS + value: test-produce-consume,test-kafkacat + - name: BROKERS + value: 0,2 + command: + - /bin/bash + - -ce + - > + echo '{"topics":[' > /tmp/reassign-topics.json; + echo -n ' {"topic":"' >> /tmp/reassign-topics.json; + echo -n $TOPICS | sed 's/,/"},\n {"topic":"/g' >> /tmp/reassign-topics.json; + echo '"}' >> /tmp/reassign-topics.json; + echo ']}' >> /tmp/reassign-topics.json; + + echo "# reassign-topics.json"; + cat /tmp/reassign-topics.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --generate + --topics-to-move-json-file=/tmp/reassign-topics.json + --broker-list=$BROKERS > /tmp/generated.txt; + + tail -n 1 /tmp/generated.txt > /tmp/proposed-reassignment.json; + + echo "# proposed-reassignment.json"; + cat /tmp/proposed-reassignment.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --execute + --reassignment-json-file=/tmp/proposed-reassignment.json; + restartPolicy: Never + backoffLimit: 3 diff --git a/maintenance/replication-factor-increase-job.yml b/maintenance/replication-factor-increase-job.yml new file mode 100644 index 0000000..de35987 --- /dev/null +++ b/maintenance/replication-factor-increase-job.yml @@ -0,0 +1,65 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: replication-factor-increase + namespace: kafka +spec: + template: + metadata: + name: replication-factor-increase + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + env: + - name: ZOOKEEPER + value: zookeeper.kafka:2181 + # the following must be edited per job + - name: TOPICS + value: "" + - name: BROKERS + value: 0,1,2 + command: + - /bin/bash + - -ce + - > + if [ -z "$TOPICS" ]; then + echo "Please set the TOPICS env (comma-separated) and re-create the job" + tail -f /dev/null + fi + + echo '{"topics":[' > /tmp/reassign-topics.json; + echo -n ' {"topic":"' >> /tmp/reassign-topics.json; + echo -n $TOPICS | sed 's/,/"},\n {"topic":"/g' >> /tmp/reassign-topics.json; + echo '"}' >> /tmp/reassign-topics.json; + echo ']}' >> /tmp/reassign-topics.json; + + echo "# reassign-topics.json"; + cat /tmp/reassign-topics.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --generate + --topics-to-move-json-file=/tmp/reassign-topics.json + --broker-list=$BROKERS > /tmp/generated.txt; + + tail -n 1 /tmp/generated.txt > /tmp/proposed-reassignment.json; + + sleep 1; + echo "# proposed-reassignment.json"; + cat /tmp/proposed-reassignment.json; + + sed -i 's/"replicas":\[.\]/"replicas":[0,1,2]/g' /tmp/proposed-reassignment.json; + sed -i 's/,"log_dirs":\["any"\]//g' /tmp/proposed-reassignment.json; + echo "# proposed-reassignment.json modified to affect replication factor"; + cat /tmp/proposed-reassignment.json; + + echo "# Triggering kafka-reassign-partitions.sh" + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --execute + --reassignment-json-file=/tmp/proposed-reassignment.json; + + echo "# Reassignment exited. Upon success you may want to run preferred-replica-election." + restartPolicy: Never + backoffLimit: 3 diff --git a/maintenance/test/replicated-partitions.yml b/maintenance/test/replicated-partitions.yml new file mode 100644 index 0000000..a1f8158 --- /dev/null +++ b/maintenance/test/replicated-partitions.yml @@ -0,0 +1,50 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: replicated-partitions + namespace: test-kafka +spec: + replicas: 1 + selector: + matchLabels: + test-type: readiness + test-target: under-replicated-partitions + template: + metadata: + labels: + test-type: readiness + test-target: under-replicated-partitions + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + command: + - /bin/bash + - -ec + - > + touch /tmp/testlog; + tail -f /tmp/testlog + readinessProbe: + exec: + command: + - /bin/bash + - -c + - > + echo "### $(date -Ins -u) ###" >> /tmp/testlog + && + [ + $( + ./bin/kafka-topics.sh + --zookeeper zookeeper.kafka:2181 + --describe + --under-replicated-partitions + | + tee -a /tmp/testlog + | + wc -l + ) + -eq + 0 + ] + periodSeconds: 30 + timeoutSeconds: 29 diff --git a/prometheus/10-metrics-config.yml b/prometheus/10-metrics-config.yml new file mode 100644 index 0000000..345e192 --- /dev/null +++ b/prometheus/10-metrics-config.yml @@ -0,0 +1,45 @@ +kind: ConfigMap +metadata: + name: jmx-config + namespace: kafka +apiVersion: v1 +data: + + jmx-kafka-prometheus.yml: |+ + lowercaseOutputName: true + jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi + ssl: false + whitelistObjectNames: ["kafka.server:*","kafka.controller:*","java.lang:*"] + rules: + - pattern : kafka.server<type=ReplicaFetcherManager, name=MaxLag, clientId=(.+)><>Value + - pattern : kafka.server<type=BrokerTopicMetrics, name=(BytesInPerSec|BytesOutPerSec|MessagesInPerSec), topic=(.+)><>OneMinuteRate + - pattern : kafka.server<type=KafkaRequestHandlerPool, name=RequestHandlerAvgIdlePercent><>OneMinuteRate + - pattern : kafka.server<type=Produce><>queue-size + - pattern : kafka.server<type=ReplicaManager, name=(PartitionCount|UnderReplicatedPartitions)><>(Value|OneMinuteRate) + - pattern : kafka.server<type=controller-channel-metrics, broker-id=(.+)><>(.*) + - pattern : kafka.server<type=socket-server-metrics, networkProcessor=(.+)><>(.*) + - pattern : kafka.server<type=Fetch><>queue-size + - pattern : kafka.server<type=SessionExpireListener, name=(.+)><>OneMinuteRate + - pattern : kafka.controller<type=KafkaController, name=(.+)><>Value + - pattern : java.lang<type=OperatingSystem><>SystemCpuLoad + - pattern : java.lang<type=Memory><HeapMemoryUsage>used + - pattern : java.lang<type=OperatingSystem><>FreePhysicalMemorySize + + jmx-zookeeper-prometheus.yaml: |+ + rules: + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d)><>(\\w+)" + name: "zookeeper_$2" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d), name1=replica.(\\d)><>(\\w+)" + name: "zookeeper_$3" + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d), name1=replica.(\\d), name2=(\\w+)><>(\\w+)" + name: "zookeeper_$4" + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d), name1=replica.(\\d), name2=(\\w+), name3=(\\w+)><>(\\w+)" + name: "zookeeper_$4_$5" + labels: + replicaId: "$2" + memberType: "$3"
\ No newline at end of file diff --git a/prometheus/50-kafka-jmx-exporter-patch.yml b/prometheus/50-kafka-jmx-exporter-patch.yml new file mode 100644 index 0000000..7876ef4 --- /dev/null +++ b/prometheus/50-kafka-jmx-exporter-patch.yml @@ -0,0 +1,42 @@ +# meant to be applied using +# kubectl --namespace kafka patch statefulset kafka --patch "$(cat prometheus/50-kafka-jmx-exporter-patch.yml )" +apiVersion: apps/v1beta2 +kind: StatefulSet +metadata: + name: kafka + namespace: kafka +spec: + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" + spec: + containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8 + command: + - java + - -XX:+UnlockExperimentalVMOptions + - -XX:+UseCGroupMemoryLimitForHeap + - -XX:MaxRAMFraction=1 + - -XshowSettings:vm + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - /etc/jmx-kafka/jmx-kafka-prometheus.yml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 60Mi + limits: + memory: 120Mi + volumeMounts: + - name: jmx-config + mountPath: /etc/jmx-kafka + volumes: + - name: jmx-config + configMap: + name: jmx-config diff --git a/rbac-namespace-default/pod-labler.yml b/rbac-namespace-default/pod-labler.yml new file mode 100644 index 0000000..bd488b0 --- /dev/null +++ b/rbac-namespace-default/pod-labler.yml @@ -0,0 +1,39 @@ +# To see if init containers need RBAC: +# +# $ kubectl -n kafka logs kafka-2 -c init-config +# ... +# Error from server (Forbidden): pods "kafka-2" is forbidden: User "system:serviceaccount:kafka:default" cannot get pods in the namespace "kafka": Unknown user "system:serviceaccount:kafka:default" +# +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-labler + namespace: kafka + labels: + origin: github.com_Yolean_kubernetes-kafka +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - update + - patch +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kafka-pod-labler + namespace: kafka + labels: + origin: github.com_Yolean_kubernetes-kafka +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: pod-labler +subjects: +- kind: ServiceAccount + name: default + namespace: kafka diff --git a/yahoo-kafka-manager/kafka-manager.yml b/yahoo-kafka-manager/kafka-manager.yml index 77319e4..e176981 100644 --- a/yahoo-kafka-manager/kafka-manager.yml +++ b/yahoo-kafka-manager/kafka-manager.yml @@ -15,7 +15,7 @@ spec: spec: containers: - name: kafka-manager - image: solsson/kafka-manager@sha256:e07b5c50b02c761b3471ebb62ede88afc0625e325fe428316e32fec7f227ff9b + image: solsson/kafka-manager@sha256:5db7d54cdb642ec5a92f37a869fdcf2aa479b2552e900b2d2b83b38a1806c2de ports: - containerPort: 80 env: diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index a57dd1f..d965ad1 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -4,7 +4,6 @@ metadata: name: pzoo namespace: kafka spec: -spec: selector: matchLabels: app: zookeeper diff --git a/zookeeper/test.sh b/zookeeper/test.sh deleted file mode 100755 index fbe12ff..0000000 --- a/zookeeper/test.sh +++ /dev/null @@ -1,19 +0,0 @@ -#! /bin/bash - -# Copyright 2016 The Kubernetes Authors All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -kubectl exec zoo-0 -- /opt/zookeeper/bin/zkCli.sh create /foo bar; -kubectl exec zoo-2 -- /opt/zookeeper/bin/zkCli.sh get /foo; - |