diff options
-rw-r--r-- | maintenance/README.md | 32 | ||||
-rw-r--r-- | maintenance/preferred-replica-election-job.yml | 19 | ||||
-rw-r--r-- | maintenance/reassign-paritions-job.yml | 51 | ||||
-rw-r--r-- | maintenance/replication-factor-increase-job.yml | 65 | ||||
-rw-r--r-- | maintenance/test/replicated-partitions.yml | 50 | ||||
-rw-r--r-- | prometheus/10-metrics-config.yml | 45 | ||||
-rw-r--r-- | prometheus/50-kafka-jmx-exporter-patch.yml | 42 |
7 files changed, 304 insertions, 0 deletions
diff --git a/maintenance/README.md b/maintenance/README.md new file mode 100644 index 0000000..5830d79 --- /dev/null +++ b/maintenance/README.md @@ -0,0 +1,32 @@ + +## Re-assign Leadership + +This is one of the cases where this repo begs to differ from traditional Kafka setups. +In Kubernetes the restart of a pod, and subsequent start on a different node, should be a non-event. + +> ”when a broker is stopped and restarted, it does not resume leadership of any partitions automatically” + +_-- Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”_ + +Create the `preferred-replica-election-job.yml` resource, after deleting any previous one. + +## Change a Partition's Replicas + +> ”From time to time, it may be necessary to change the replica assignments for a partition. Some examples of when this might be needed are: +> * If a topic’s partitions are not balanced across the cluster, causing uneven load on brokers +> * If a broker is taken offline and the partition is under-replicated +> * If a new broker is added and needs to receive a share of the cluster load” + +_-- Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”_ + +Use the `reassign-paritions-job.yml`, after editing `TOPICS` and `BROKERS`. + +## Increase a topic's replication factor + +See https://github.com/Yolean/kubernetes-kafka/pull/140 + +Use the `replication-factor-increase-job.yml`, after editing `TOPICS` and `BROKERS`. + +The affected topics may end up without a preferred replica. See above to fix that, +or to affect only your selected topics use [Kafka Manager](https://github.com/Yolean/kubernetes-kafka/pull/83)'s topic screen, +Generate Partition Assignments followed by Reassign Partitions. diff --git a/maintenance/preferred-replica-election-job.yml b/maintenance/preferred-replica-election-job.yml new file mode 100644 index 0000000..ac4f13a --- /dev/null +++ b/maintenance/preferred-replica-election-job.yml @@ -0,0 +1,19 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: preferred-replica-election + namespace: kafka +spec: + template: + metadata: + name: preferred-replica-election + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + command: + - ./bin/kafka-preferred-replica-election.sh + - --zookeeper + - zookeeper:2181 + restartPolicy: Never + backoffLimit: 3 diff --git a/maintenance/reassign-paritions-job.yml b/maintenance/reassign-paritions-job.yml new file mode 100644 index 0000000..e9e184e --- /dev/null +++ b/maintenance/reassign-paritions-job.yml @@ -0,0 +1,51 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: reassign-partitions + namespace: kafka +spec: + template: + metadata: + name: reassign-partitions + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + env: + - name: ZOOKEEPER + value: zookeeper.kafka:2181 + # the following must be edited per job + - name: TOPICS + value: test-produce-consume,test-kafkacat + - name: BROKERS + value: 0,2 + command: + - /bin/bash + - -ce + - > + echo '{"topics":[' > /tmp/reassign-topics.json; + echo -n ' {"topic":"' >> /tmp/reassign-topics.json; + echo -n $TOPICS | sed 's/,/"},\n {"topic":"/g' >> /tmp/reassign-topics.json; + echo '"}' >> /tmp/reassign-topics.json; + echo ']}' >> /tmp/reassign-topics.json; + + echo "# reassign-topics.json"; + cat /tmp/reassign-topics.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --generate + --topics-to-move-json-file=/tmp/reassign-topics.json + --broker-list=$BROKERS > /tmp/generated.txt; + + tail -n 1 /tmp/generated.txt > /tmp/proposed-reassignment.json; + + echo "# proposed-reassignment.json"; + cat /tmp/proposed-reassignment.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --execute + --reassignment-json-file=/tmp/proposed-reassignment.json; + restartPolicy: Never + backoffLimit: 3 diff --git a/maintenance/replication-factor-increase-job.yml b/maintenance/replication-factor-increase-job.yml new file mode 100644 index 0000000..de35987 --- /dev/null +++ b/maintenance/replication-factor-increase-job.yml @@ -0,0 +1,65 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: replication-factor-increase + namespace: kafka +spec: + template: + metadata: + name: replication-factor-increase + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + env: + - name: ZOOKEEPER + value: zookeeper.kafka:2181 + # the following must be edited per job + - name: TOPICS + value: "" + - name: BROKERS + value: 0,1,2 + command: + - /bin/bash + - -ce + - > + if [ -z "$TOPICS" ]; then + echo "Please set the TOPICS env (comma-separated) and re-create the job" + tail -f /dev/null + fi + + echo '{"topics":[' > /tmp/reassign-topics.json; + echo -n ' {"topic":"' >> /tmp/reassign-topics.json; + echo -n $TOPICS | sed 's/,/"},\n {"topic":"/g' >> /tmp/reassign-topics.json; + echo '"}' >> /tmp/reassign-topics.json; + echo ']}' >> /tmp/reassign-topics.json; + + echo "# reassign-topics.json"; + cat /tmp/reassign-topics.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --generate + --topics-to-move-json-file=/tmp/reassign-topics.json + --broker-list=$BROKERS > /tmp/generated.txt; + + tail -n 1 /tmp/generated.txt > /tmp/proposed-reassignment.json; + + sleep 1; + echo "# proposed-reassignment.json"; + cat /tmp/proposed-reassignment.json; + + sed -i 's/"replicas":\[.\]/"replicas":[0,1,2]/g' /tmp/proposed-reassignment.json; + sed -i 's/,"log_dirs":\["any"\]//g' /tmp/proposed-reassignment.json; + echo "# proposed-reassignment.json modified to affect replication factor"; + cat /tmp/proposed-reassignment.json; + + echo "# Triggering kafka-reassign-partitions.sh" + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --execute + --reassignment-json-file=/tmp/proposed-reassignment.json; + + echo "# Reassignment exited. Upon success you may want to run preferred-replica-election." + restartPolicy: Never + backoffLimit: 3 diff --git a/maintenance/test/replicated-partitions.yml b/maintenance/test/replicated-partitions.yml new file mode 100644 index 0000000..a1f8158 --- /dev/null +++ b/maintenance/test/replicated-partitions.yml @@ -0,0 +1,50 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: replicated-partitions + namespace: test-kafka +spec: + replicas: 1 + selector: + matchLabels: + test-type: readiness + test-target: under-replicated-partitions + template: + metadata: + labels: + test-type: readiness + test-target: under-replicated-partitions + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + command: + - /bin/bash + - -ec + - > + touch /tmp/testlog; + tail -f /tmp/testlog + readinessProbe: + exec: + command: + - /bin/bash + - -c + - > + echo "### $(date -Ins -u) ###" >> /tmp/testlog + && + [ + $( + ./bin/kafka-topics.sh + --zookeeper zookeeper.kafka:2181 + --describe + --under-replicated-partitions + | + tee -a /tmp/testlog + | + wc -l + ) + -eq + 0 + ] + periodSeconds: 30 + timeoutSeconds: 29 diff --git a/prometheus/10-metrics-config.yml b/prometheus/10-metrics-config.yml new file mode 100644 index 0000000..345e192 --- /dev/null +++ b/prometheus/10-metrics-config.yml @@ -0,0 +1,45 @@ +kind: ConfigMap +metadata: + name: jmx-config + namespace: kafka +apiVersion: v1 +data: + + jmx-kafka-prometheus.yml: |+ + lowercaseOutputName: true + jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi + ssl: false + whitelistObjectNames: ["kafka.server:*","kafka.controller:*","java.lang:*"] + rules: + - pattern : kafka.server<type=ReplicaFetcherManager, name=MaxLag, clientId=(.+)><>Value + - pattern : kafka.server<type=BrokerTopicMetrics, name=(BytesInPerSec|BytesOutPerSec|MessagesInPerSec), topic=(.+)><>OneMinuteRate + - pattern : kafka.server<type=KafkaRequestHandlerPool, name=RequestHandlerAvgIdlePercent><>OneMinuteRate + - pattern : kafka.server<type=Produce><>queue-size + - pattern : kafka.server<type=ReplicaManager, name=(PartitionCount|UnderReplicatedPartitions)><>(Value|OneMinuteRate) + - pattern : kafka.server<type=controller-channel-metrics, broker-id=(.+)><>(.*) + - pattern : kafka.server<type=socket-server-metrics, networkProcessor=(.+)><>(.*) + - pattern : kafka.server<type=Fetch><>queue-size + - pattern : kafka.server<type=SessionExpireListener, name=(.+)><>OneMinuteRate + - pattern : kafka.controller<type=KafkaController, name=(.+)><>Value + - pattern : java.lang<type=OperatingSystem><>SystemCpuLoad + - pattern : java.lang<type=Memory><HeapMemoryUsage>used + - pattern : java.lang<type=OperatingSystem><>FreePhysicalMemorySize + + jmx-zookeeper-prometheus.yaml: |+ + rules: + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d)><>(\\w+)" + name: "zookeeper_$2" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d), name1=replica.(\\d)><>(\\w+)" + name: "zookeeper_$3" + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d), name1=replica.(\\d), name2=(\\w+)><>(\\w+)" + name: "zookeeper_$4" + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d), name1=replica.(\\d), name2=(\\w+), name3=(\\w+)><>(\\w+)" + name: "zookeeper_$4_$5" + labels: + replicaId: "$2" + memberType: "$3"
\ No newline at end of file diff --git a/prometheus/50-kafka-jmx-exporter-patch.yml b/prometheus/50-kafka-jmx-exporter-patch.yml new file mode 100644 index 0000000..7876ef4 --- /dev/null +++ b/prometheus/50-kafka-jmx-exporter-patch.yml @@ -0,0 +1,42 @@ +# meant to be applied using +# kubectl --namespace kafka patch statefulset kafka --patch "$(cat prometheus/50-kafka-jmx-exporter-patch.yml )" +apiVersion: apps/v1beta2 +kind: StatefulSet +metadata: + name: kafka + namespace: kafka +spec: + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" + spec: + containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8 + command: + - java + - -XX:+UnlockExperimentalVMOptions + - -XX:+UseCGroupMemoryLimitForHeap + - -XX:MaxRAMFraction=1 + - -XshowSettings:vm + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - /etc/jmx-kafka/jmx-kafka-prometheus.yml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 60Mi + limits: + memory: 120Mi + volumeMounts: + - name: jmx-config + mountPath: /etc/jmx-kafka + volumes: + - name: jmx-config + configMap: + name: jmx-config |