From 7be9797ab1fec434610aa259c4074c5d1cb91d31 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 1 Dec 2017 09:01:37 +0100 Subject: Lets clusters define their preferred (prod) replication factor so that we can omit replication-factor when creating topics, as we benefit from reusing such commands or definitions across dev-qa-prod. --- kafka/10broker-config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index c0c4c8c..995bc03 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -107,6 +107,9 @@ data: # the brokers. num.partitions=1 + default.replication.factor=3 + min.insync.replicas=2 + # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. # This value is recommended to be increased for installations with data dirs located in RAID array. num.recovery.threads.per.data.dir=1 -- cgit v1.2.3 From de6adfdd2571a4f4fb904a2f17f5242086e11ef6 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Wed, 13 Dec 2017 22:21:15 +0100 Subject: Enables auto create topics because there's use cases for adding topics at runtime, such as splitting a stream based on some business enum. Producers and Kafka Streams apps would otherwise need to set up an AdminClient to do that. This reverts commit: 0681cc515fa1c505b905ef60c7d3132e8d7510af --- kafka/50kafka.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/50kafka.yml b/kafka/50kafka.yml index 1157235..5b51a91 100644 --- a/kafka/50kafka.yml +++ b/kafka/50kafka.yml @@ -64,7 +64,7 @@ spec: - --override - log.dirs=/var/lib/kafka/data/topics - --override - - auto.create.topics.enable=false + - auto.create.topics.enable=true resources: requests: cpu: 100m -- cgit v1.2.3 From 166a616330f3475820fc8fb42d6e0c9f71a4d302 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Wed, 13 Dec 2017 22:26:52 +0100 Subject: Gathers the config that has to do with topic creation in one place. These values are critical to maintain for those, like us, who make use of auto create topics for production data. Also a step towards #72 and #77. --- kafka/10broker-config.yml | 3 +++ kafka/50kafka.yml | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index 995bc03..77400bf 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -108,8 +108,11 @@ data: num.partitions=1 default.replication.factor=3 + min.insync.replicas=2 + auto.create.topics.enable=true + # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. # This value is recommended to be increased for installations with data dirs located in RAID array. num.recovery.threads.per.data.dir=1 diff --git a/kafka/50kafka.yml b/kafka/50kafka.yml index 5b51a91..8486f9c 100644 --- a/kafka/50kafka.yml +++ b/kafka/50kafka.yml @@ -63,8 +63,6 @@ spec: - log.retention.hours=-1 - --override - log.dirs=/var/lib/kafka/data/topics - - --override - - auto.create.topics.enable=true resources: requests: cpu: 100m -- cgit v1.2.3 From b34f018ba6a9ff43e59ed22397cead21b58463ca Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Wed, 13 Dec 2017 22:45:06 +0100 Subject: Places the values that are most likely to need attention at the top --- kafka/10broker-config.yml | 71 +++++++++++++---------------------------------- 1 file changed, 19 insertions(+), 52 deletions(-) diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index 77400bf..c1de5d0 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -35,22 +35,25 @@ data: } server.properties: |- - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - - # see kafka.server.KafkaConfig for additional details and defaults + ############################# Log Basics ############################# + + # A comma seperated list of directories under which to store log files + log.dirs=/tmp/kafka-logs + + # The default number of log partitions per topic. More partitions allow greater + # parallelism for consumption, but this will also result in more files across + # the brokers. + num.partitions=1 + + default.replication.factor=3 + + min.insync.replicas=2 + + auto.create.topics.enable=true + + # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. + # This value is recommended to be increased for installations with data dirs located in RAID array. + num.recovery.threads.per.data.dir=1 ############################# Server Basics ############################# @@ -96,27 +99,6 @@ data: # The maximum size of a request that the socket server will accept (protection against OOM) socket.request.max.bytes=104857600 - - ############################# Log Basics ############################# - - # A comma seperated list of directories under which to store log files - log.dirs=/tmp/kafka-logs - - # The default number of log partitions per topic. More partitions allow greater - # parallelism for consumption, but this will also result in more files across - # the brokers. - num.partitions=1 - - default.replication.factor=3 - - min.insync.replicas=2 - - auto.create.topics.enable=true - - # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. - # This value is recommended to be increased for installations with data dirs located in RAID array. - num.recovery.threads.per.data.dir=1 - ############################# Internal Topic Settings ############################# # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" # For anything other than development testing, a value greater than 1 is recommended for to ensure availability such as 3. @@ -185,21 +167,6 @@ data: group.initial.rebalance.delay.ms=0 log4j.properties: |- - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # Unspecified loggers and loggers with additivity=true output to server.log and stdout # Note that INFO only applies to unspecified loggers, the log level of the child logger is used otherwise log4j.rootLogger=INFO, stdout -- cgit v1.2.3 From cd3c72b7e8a30c199e2840378e53c4ef08277d0e Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Wed, 13 Dec 2017 22:48:32 +0100 Subject: Permanent retention is a friendlier default, and easy to reconfigure per topic as they grow. We already had that, but this branch cares about grouping such conf. It also encourages topic defaults geared towards persistent data. --- kafka/10broker-config.yml | 2 +- kafka/50kafka.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index c1de5d0..bdc99b1 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -131,7 +131,7 @@ data: # from the end of the log. # The minimum age of a log file to be eligible for deletion due to age - log.retention.hours=168 + log.retention.hours=-1 # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining # segments don't drop below log.retention.bytes. Functions independently of log.retention.hours. diff --git a/kafka/50kafka.yml b/kafka/50kafka.yml index 8486f9c..a25ec6f 100644 --- a/kafka/50kafka.yml +++ b/kafka/50kafka.yml @@ -60,8 +60,6 @@ spec: - --override - zookeeper.connect=zookeeper:2181 - --override - - log.retention.hours=-1 - - --override - log.dirs=/var/lib/kafka/data/topics resources: requests: -- cgit v1.2.3 From b9f4e9d25a2e415b95e9c2b712491bf034e7f588 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Wed, 13 Dec 2017 22:52:32 +0100 Subject: Moves the essential data path config to the config location suggested by Kafka's sample conf. --- kafka/10broker-config.yml | 2 +- kafka/50kafka.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index bdc99b1..ba2192f 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -38,7 +38,7 @@ data: ############################# Log Basics ############################# # A comma seperated list of directories under which to store log files - log.dirs=/tmp/kafka-logs + log.dirs=/var/lib/kafka/data/topics # The default number of log partitions per topic. More partitions allow greater # parallelism for consumption, but this will also result in more files across diff --git a/kafka/50kafka.yml b/kafka/50kafka.yml index a25ec6f..9e2bd60 100644 --- a/kafka/50kafka.yml +++ b/kafka/50kafka.yml @@ -59,8 +59,6 @@ spec: - /etc/kafka/server.properties - --override - zookeeper.connect=zookeeper:2181 - - --override - - log.dirs=/var/lib/kafka/data/topics resources: requests: cpu: 100m -- cgit v1.2.3 From b5a1cb102bb268ff5215d40e027e6bd0570c64e5 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Wed, 13 Dec 2017 22:53:49 +0100 Subject: Now was a good time to get rid of the last --override as we now encourage close scrutiny of the config file. --- kafka/10broker-config.yml | 2 +- kafka/50kafka.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index ba2192f..1ff162f 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -151,7 +151,7 @@ data: # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". # You can also append an optional chroot string to the urls to specify the # root directory for all kafka znodes. - zookeeper.connect=localhost:2181 + zookeeper.connect=zookeeper:2181 # Timeout in ms for connecting to zookeeper zookeeper.connection.timeout.ms=6000 diff --git a/kafka/50kafka.yml b/kafka/50kafka.yml index 9e2bd60..e622678 100644 --- a/kafka/50kafka.yml +++ b/kafka/50kafka.yml @@ -57,8 +57,6 @@ spec: command: - ./bin/kafka-server-start.sh - /etc/kafka/server.properties - - --override - - zookeeper.connect=zookeeper:2181 resources: requests: cpu: 100m -- cgit v1.2.3 From af7990eddfc745899713ad3ce37ed890fe709c44 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Thu, 14 Dec 2017 07:35:40 +0100 Subject: I find it scary when broker start logs: log.dir = /tmp/kafka-logs log.dirs = /var/lib/kafka/data/topics but this is the lesser of two evils compared to duplicate values --- kafka/10broker-config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index 1ff162f..d11eeaf 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -38,6 +38,7 @@ data: ############################# Log Basics ############################# # A comma seperated list of directories under which to store log files + # Overrides log.dir log.dirs=/var/lib/kafka/data/topics # The default number of log partitions per topic. More partitions allow greater -- cgit v1.2.3 From 096e3bdfba31d2918520011886a6a0d1f1b08c84 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Thu, 14 Dec 2017 07:50:06 +0100 Subject: Deprecates our own statefulset pod label kafka-broker-id --- README.md | 2 ++ kafka/10broker-config.yml | 1 + 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index c349564..4ba9a0c 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,8 @@ For clusters that enfoce [RBAC](https://kubernetes.io/docs/admin/authorization/r kubectl apply -f rbac-namespace-default/ ``` +For example rack awareness can fail without this, `logs -c init-config` showing `Error from server (Forbidden): pods "kafka-0" is forbidden: User "system:serviceaccount:kafka:default" cannot get pods in the namespace "kafka": Unknown user "system:serviceaccount:kafka:default"`. + ## Tests Tests are based on the [kube-test](https://github.com/Yolean/kube-test) concept. diff --git a/kafka/10broker-config.yml b/kafka/10broker-config.yml index d11eeaf..bc1d55d 100644 --- a/kafka/10broker-config.yml +++ b/kafka/10broker-config.yml @@ -23,6 +23,7 @@ data: sed -i "s/#init#broker.rack=#init#/broker.rack=$ZONE/" /etc/kafka/server.properties fi + # This requires additional RBAC, and won't be needed after https://github.com/kubernetes/kubernetes/pull/55329 kubectl -n $POD_NAMESPACE label pod $POD_NAME kafka-broker-id=$KAFKA_BROKER_ID OUTSIDE_HOST=$(kubectl get node "$NODE_NAME" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') -- cgit v1.2.3