From b2758955030a185a86c21a527ac9da4a569afe66 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 13:07:54 +0100 Subject: Current config from the metrics-improve-scrape-times branch, see https://github.com/Yolean/kubernetes-kafka/pull/49 --- jmx/10-metrics-config.yml | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 jmx/10-metrics-config.yml diff --git a/jmx/10-metrics-config.yml b/jmx/10-metrics-config.yml new file mode 100644 index 0000000..4416cce --- /dev/null +++ b/jmx/10-metrics-config.yml @@ -0,0 +1,44 @@ +kind: ConfigMap +metadata: + name: jmx-config + namespace: kafka +apiVersion: v1 +data: + + jmx-kafka-prometheus.yml: |+ + lowercaseOutputName: true + jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi + ssl: false + whitelistObjectNames: ["kafka.server:*","java.lang:*"] + rules: + - pattern : kafka.server<>Value + - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.server<>queue-size + - pattern : kafka.server<>(Value|OneMinuteRate) + - pattern : kafka.server<>(.*) + - pattern : kafka.server<>(.*) + - pattern : kafka.server<>queue-size + - pattern : kafka.server<>OneMinuteRate + - pattern : java.lang<>SystemCpuLoad + - pattern : java.langused + - pattern : java.lang<>FreePhysicalMemorySize + + jmx-zookeeper-prometheus.yaml: |+ + rules: + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$3" + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4" + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4_$5" + labels: + replicaId: "$2" + memberType: "$3" \ No newline at end of file -- cgit v1.2.3 From d82b419d3ea798c23c3a85ff793d03ef8e483314 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 13:13:06 +0100 Subject: The metrics part of #49 --- jmx/50kafka.yml | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 jmx/50kafka.yml diff --git a/jmx/50kafka.yml b/jmx/50kafka.yml new file mode 100644 index 0000000..9dea8bb --- /dev/null +++ b/jmx/50kafka.yml @@ -0,0 +1,38 @@ +apiVersion: apps/v1beta2 +kind: StatefulSet +metadata: + name: kafka + namespace: kafka +spec: + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" + spec: + containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:40a6ab24ccac0ed5acb8c02dccfbb1f5924fd97f46c0450e0245686c24138b53 + command: + - java + - -Xmx64M + - -XX:MaxMetaspaceSize=32m + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - /etc/jmx-kafka/jmx-kafka-prometheus.yml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 100Mi + limits: + memory: 150Mi + volumeMounts: + - name: jmx-config + mountPath: /etc/jmx-kafka + volumes: + - name: jmx-config + configMap: + name: jmx-config -- cgit v1.2.3 From 162902ca01e5245448fa9c9c8ecf69e138bd66da Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 13:13:36 +0100 Subject: Upgrades jmx exporter to 0.2.0 --- jmx/50kafka.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmx/50kafka.yml b/jmx/50kafka.yml index 9dea8bb..3e19b6f 100644 --- a/jmx/50kafka.yml +++ b/jmx/50kafka.yml @@ -12,7 +12,7 @@ spec: spec: containers: - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:40a6ab24ccac0ed5acb8c02dccfbb1f5924fd97f46c0450e0245686c24138b53 + image: solsson/kafka-prometheus-jmx-exporter@sha256:a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8 command: - java - -Xmx64M -- cgit v1.2.3 From 74a5177270301f3cbe0342657d8bcf70d1ae76ed Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 15:23:33 +0100 Subject: Adds liveness probe from the metrics-improve-scrape-times branch --- jmx/50kafka.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/jmx/50kafka.yml b/jmx/50kafka.yml index 3e19b6f..0e475c2 100644 --- a/jmx/50kafka.yml +++ b/jmx/50kafka.yml @@ -23,6 +23,11 @@ spec: - /etc/jmx-kafka/jmx-kafka-prometheus.yml ports: - containerPort: 5556 + livenessProbe: + httpGet: + path: /liveness + port: 5556 + periodSeconds: 60 resources: requests: cpu: 0m -- cgit v1.2.3 From da113d29ecdfb9c1d6308743b0c29dd68252b177 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 15:31:08 +0100 Subject: This is an optional feature, but belongs to the broker pods, so let's evaluate kubectl patch to add it --- jmx/50kafka.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jmx/50kafka.yml b/jmx/50kafka.yml index 0e475c2..91e1765 100644 --- a/jmx/50kafka.yml +++ b/jmx/50kafka.yml @@ -1,3 +1,5 @@ +# meant to be applied using +# kubectl --namespace kafka patch statefulset kafka --patch "$(cat jmx/50kafka.yml)" apiVersion: apps/v1beta2 kind: StatefulSet metadata: -- cgit v1.2.3 From eaf9ebd4527a0ce2706c14c92638e8cd00484280 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 15:32:59 +0100 Subject: Evaluates JVM memory limit awareness Interesting input for #112, for use with broker and zk pods in addition to KAFKA_HEAP_OPTS. --- jmx/50kafka.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/jmx/50kafka.yml b/jmx/50kafka.yml index 91e1765..30a390b 100644 --- a/jmx/50kafka.yml +++ b/jmx/50kafka.yml @@ -17,8 +17,10 @@ spec: image: solsson/kafka-prometheus-jmx-exporter@sha256:a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8 command: - java - - -Xmx64M - - -XX:MaxMetaspaceSize=32m + - -XX:+UnlockExperimentalVMOptions + - -XX:+UseCGroupMemoryLimitForHeap + - -XX:MaxRAMFraction=1 + - -XshowSettings:vm - -jar - jmx_prometheus_httpserver.jar - "5556" -- cgit v1.2.3 From 66255af447373f9b14e338a4b819dddad483a257 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 15:33:45 +0100 Subject: On start the metrics pod gesses that it has ~45% of the memory limit as "Max. Heap Size (Estimated)" Reducing limits as experiment for #112. --- jmx/50kafka.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jmx/50kafka.yml b/jmx/50kafka.yml index 30a390b..9da1d41 100644 --- a/jmx/50kafka.yml +++ b/jmx/50kafka.yml @@ -35,9 +35,9 @@ spec: resources: requests: cpu: 0m - memory: 100Mi + memory: 60Mi limits: - memory: 150Mi + memory: 120Mi volumeMounts: - name: jmx-config mountPath: /etc/jmx-kafka -- cgit v1.2.3 From e05b7900335bdcc35fb91d2df1c8257ae21360f5 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 15:43:19 +0100 Subject: With kubectl patch, this folder kan be scoped as Prometheus support --- jmx/10-metrics-config.yml | 44 ---------------------------- jmx/50kafka.yml | 47 ------------------------------ prometheus/10-metrics-config.yml | 44 ++++++++++++++++++++++++++++ prometheus/50-kafka-jmx-exporter-patch.yml | 47 ++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 91 deletions(-) delete mode 100644 jmx/10-metrics-config.yml delete mode 100644 jmx/50kafka.yml create mode 100644 prometheus/10-metrics-config.yml create mode 100644 prometheus/50-kafka-jmx-exporter-patch.yml diff --git a/jmx/10-metrics-config.yml b/jmx/10-metrics-config.yml deleted file mode 100644 index 4416cce..0000000 --- a/jmx/10-metrics-config.yml +++ /dev/null @@ -1,44 +0,0 @@ -kind: ConfigMap -metadata: - name: jmx-config - namespace: kafka -apiVersion: v1 -data: - - jmx-kafka-prometheus.yml: |+ - lowercaseOutputName: true - jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi - ssl: false - whitelistObjectNames: ["kafka.server:*","java.lang:*"] - rules: - - pattern : kafka.server<>Value - - pattern : kafka.server<>OneMinuteRate - - pattern : kafka.server<>OneMinuteRate - - pattern : kafka.server<>queue-size - - pattern : kafka.server<>(Value|OneMinuteRate) - - pattern : kafka.server<>(.*) - - pattern : kafka.server<>(.*) - - pattern : kafka.server<>queue-size - - pattern : kafka.server<>OneMinuteRate - - pattern : java.lang<>SystemCpuLoad - - pattern : java.langused - - pattern : java.lang<>FreePhysicalMemorySize - - jmx-zookeeper-prometheus.yaml: |+ - rules: - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$2" - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$3" - labels: - replicaId: "$2" - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$4" - labels: - replicaId: "$2" - memberType: "$3" - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$4_$5" - labels: - replicaId: "$2" - memberType: "$3" \ No newline at end of file diff --git a/jmx/50kafka.yml b/jmx/50kafka.yml deleted file mode 100644 index 9da1d41..0000000 --- a/jmx/50kafka.yml +++ /dev/null @@ -1,47 +0,0 @@ -# meant to be applied using -# kubectl --namespace kafka patch statefulset kafka --patch "$(cat jmx/50kafka.yml)" -apiVersion: apps/v1beta2 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - template: - metadata: - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "5556" - spec: - containers: - - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8 - command: - - java - - -XX:+UnlockExperimentalVMOptions - - -XX:+UseCGroupMemoryLimitForHeap - - -XX:MaxRAMFraction=1 - - -XshowSettings:vm - - -jar - - jmx_prometheus_httpserver.jar - - "5556" - - /etc/jmx-kafka/jmx-kafka-prometheus.yml - ports: - - containerPort: 5556 - livenessProbe: - httpGet: - path: /liveness - port: 5556 - periodSeconds: 60 - resources: - requests: - cpu: 0m - memory: 60Mi - limits: - memory: 120Mi - volumeMounts: - - name: jmx-config - mountPath: /etc/jmx-kafka - volumes: - - name: jmx-config - configMap: - name: jmx-config diff --git a/prometheus/10-metrics-config.yml b/prometheus/10-metrics-config.yml new file mode 100644 index 0000000..4416cce --- /dev/null +++ b/prometheus/10-metrics-config.yml @@ -0,0 +1,44 @@ +kind: ConfigMap +metadata: + name: jmx-config + namespace: kafka +apiVersion: v1 +data: + + jmx-kafka-prometheus.yml: |+ + lowercaseOutputName: true + jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi + ssl: false + whitelistObjectNames: ["kafka.server:*","java.lang:*"] + rules: + - pattern : kafka.server<>Value + - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.server<>queue-size + - pattern : kafka.server<>(Value|OneMinuteRate) + - pattern : kafka.server<>(.*) + - pattern : kafka.server<>(.*) + - pattern : kafka.server<>queue-size + - pattern : kafka.server<>OneMinuteRate + - pattern : java.lang<>SystemCpuLoad + - pattern : java.langused + - pattern : java.lang<>FreePhysicalMemorySize + + jmx-zookeeper-prometheus.yaml: |+ + rules: + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$3" + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4" + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4_$5" + labels: + replicaId: "$2" + memberType: "$3" \ No newline at end of file diff --git a/prometheus/50-kafka-jmx-exporter-patch.yml b/prometheus/50-kafka-jmx-exporter-patch.yml new file mode 100644 index 0000000..c17e791 --- /dev/null +++ b/prometheus/50-kafka-jmx-exporter-patch.yml @@ -0,0 +1,47 @@ +# meant to be applied using +# kubectl --namespace kafka patch statefulset kafka --patch "$(cat prometheus/50-kafka-jmx-exporter-patch.yml )" +apiVersion: apps/v1beta2 +kind: StatefulSet +metadata: + name: kafka + namespace: kafka +spec: + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" + spec: + containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8 + command: + - java + - -XX:+UnlockExperimentalVMOptions + - -XX:+UseCGroupMemoryLimitForHeap + - -XX:MaxRAMFraction=1 + - -XshowSettings:vm + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - /etc/jmx-kafka/jmx-kafka-prometheus.yml + ports: + - containerPort: 5556 + livenessProbe: + httpGet: + path: /liveness + port: 5556 + periodSeconds: 60 + resources: + requests: + cpu: 0m + memory: 60Mi + limits: + memory: 120Mi + volumeMounts: + - name: jmx-config + mountPath: /etc/jmx-kafka + volumes: + - name: jmx-config + configMap: + name: jmx-config -- cgit v1.2.3 From f1e6e96231465775ffa0ded5b745774182fec5ff Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 16:02:39 +0100 Subject: With 120M you get OOMKilled even on brokers with modest metrics volumes This reverts commit 66255af447373f9b14e338a4b819dddad483a257. --- prometheus/50-kafka-jmx-exporter-patch.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus/50-kafka-jmx-exporter-patch.yml b/prometheus/50-kafka-jmx-exporter-patch.yml index c17e791..f257dbe 100644 --- a/prometheus/50-kafka-jmx-exporter-patch.yml +++ b/prometheus/50-kafka-jmx-exporter-patch.yml @@ -35,9 +35,9 @@ spec: resources: requests: cpu: 0m - memory: 60Mi + memory: 100Mi limits: - memory: 120Mi + memory: 150Mi volumeMounts: - name: jmx-config mountPath: /etc/jmx-kafka -- cgit v1.2.3 From 0d78e08f6f929997f741ff79b6ec7a63c4c7cda9 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 19 Jan 2018 16:11:47 +0100 Subject: Removes liveness probes, to focus on memory limits, and rely on metric staleness alerts instead for exporter liveness. This reverts commit 74a5177270301f3cbe0342657d8bcf70d1ae76ed. --- prometheus/50-kafka-jmx-exporter-patch.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/prometheus/50-kafka-jmx-exporter-patch.yml b/prometheus/50-kafka-jmx-exporter-patch.yml index f257dbe..f5a9cf1 100644 --- a/prometheus/50-kafka-jmx-exporter-patch.yml +++ b/prometheus/50-kafka-jmx-exporter-patch.yml @@ -27,11 +27,6 @@ spec: - /etc/jmx-kafka/jmx-kafka-prometheus.yml ports: - containerPort: 5556 - livenessProbe: - httpGet: - path: /liveness - port: 5556 - periodSeconds: 60 resources: requests: cpu: 0m -- cgit v1.2.3 From cfe434c55bc44d53ff6a569fa480ef4d92bd1144 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 20 Jan 2018 07:55:09 +0100 Subject: It was the liveness probe that killed the metrics container, and we might not need liveness if we have alerts for stale metrics. This reverts commit f1e6e96231465775ffa0ded5b745774182fec5ff. --- prometheus/50-kafka-jmx-exporter-patch.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus/50-kafka-jmx-exporter-patch.yml b/prometheus/50-kafka-jmx-exporter-patch.yml index f5a9cf1..7876ef4 100644 --- a/prometheus/50-kafka-jmx-exporter-patch.yml +++ b/prometheus/50-kafka-jmx-exporter-patch.yml @@ -30,9 +30,9 @@ spec: resources: requests: cpu: 0m - memory: 100Mi + memory: 60Mi limits: - memory: 150Mi + memory: 120Mi volumeMounts: - name: jmx-config mountPath: /etc/jmx-kafka -- cgit v1.2.3 From 152bb19a7cdd448941e07c3a7c23e5a16f9c6b56 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 2 Feb 2018 12:33:27 +0100 Subject: ”In a production Kafka cluster, an offline partition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit may be impacting the producer clients, losing messages or causing back-pressure in the application. This is most often a “site down” type of problem and will need to be addressed immediately.” Excerpt from: Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”. We now export kafka_controller_kafkacontroller_value{name="OfflinePartitionsCount",} and friends. See #140 for why. --- prometheus/10-metrics-config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prometheus/10-metrics-config.yml b/prometheus/10-metrics-config.yml index 4416cce..33bed50 100644 --- a/prometheus/10-metrics-config.yml +++ b/prometheus/10-metrics-config.yml @@ -9,8 +9,9 @@ data: lowercaseOutputName: true jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi ssl: false - whitelistObjectNames: ["kafka.server:*","java.lang:*"] + whitelistObjectNames: ["kafka.controller:*","kafka.server:*","java.lang:*"] rules: + - pattern : kafka.controller<>(.*) - pattern : kafka.server<>Value - pattern : kafka.server<>OneMinuteRate - pattern : kafka.server<>OneMinuteRate -- cgit v1.2.3 From f02213898558c4363e8e294f4a38e7849446ec27 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 3 Feb 2018 14:13:44 +0100 Subject: At first i suspected that order matters, but this also works --- prometheus/10-metrics-config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus/10-metrics-config.yml b/prometheus/10-metrics-config.yml index 33bed50..7d77b16 100644 --- a/prometheus/10-metrics-config.yml +++ b/prometheus/10-metrics-config.yml @@ -9,9 +9,8 @@ data: lowercaseOutputName: true jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi ssl: false - whitelistObjectNames: ["kafka.controller:*","kafka.server:*","java.lang:*"] + whitelistObjectNames: ["kafka.server:*","kafka.controller:*","java.lang:*"] rules: - - pattern : kafka.controller<>(.*) - pattern : kafka.server<>Value - pattern : kafka.server<>OneMinuteRate - pattern : kafka.server<>OneMinuteRate @@ -21,6 +20,7 @@ data: - pattern : kafka.server<>(.*) - pattern : kafka.server<>queue-size - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.controller<>(.*) - pattern : java.lang<>SystemCpuLoad - pattern : java.langused - pattern : java.lang<>FreePhysicalMemorySize -- cgit v1.2.3 From a564ed271d95058f23d3230042865b918b03214d Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 3 Feb 2018 14:25:07 +0100 Subject: There's only values in this metric type --- prometheus/10-metrics-config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/10-metrics-config.yml b/prometheus/10-metrics-config.yml index 7d77b16..345e192 100644 --- a/prometheus/10-metrics-config.yml +++ b/prometheus/10-metrics-config.yml @@ -20,7 +20,7 @@ data: - pattern : kafka.server<>(.*) - pattern : kafka.server<>queue-size - pattern : kafka.server<>OneMinuteRate - - pattern : kafka.controller<>(.*) + - pattern : kafka.controller<>Value - pattern : java.lang<>SystemCpuLoad - pattern : java.langused - pattern : java.lang<>FreePhysicalMemorySize -- cgit v1.2.3