From b3f6d05131fc98b1306ecdc2e6c0cd1172a9cfb2 Mon Sep 17 00:00:00 2001
From: Max Gautier <mg@max.gautier.name>
Date: Wed, 8 Nov 2023 12:35:20 +0100
Subject: [PATCH] Move control plane certs renewal "spread out" into the
 systemd timer (#10596)

* Use RandomizedDelaySec to spread out control certificates renewal plane

If the number of control plane node is superior to 6, using (index * 10
minutes) will fail (03:60:00 is not a valid timestamp).

Compared to just fixing the jinja expression (to use a modulo for
example), this should avoid having two control planes certificates
update node being triggered at the same time.

* Make k8s-certs-renew.timer Persistent

If the control plane happens to be offline during the scheduled
certificates renewal (node failure or anything like that), we still want
the renewal to happen.
---
 roles/kubernetes/control-plane/defaults/main/main.yml          | 3 +--
 .../control-plane/templates/k8s-certs-renew.timer.j2           | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/roles/kubernetes/control-plane/defaults/main/main.yml b/roles/kubernetes/control-plane/defaults/main/main.yml
index 4a9800a65..ad9456d5b 100644
--- a/roles/kubernetes/control-plane/defaults/main/main.yml
+++ b/roles/kubernetes/control-plane/defaults/main/main.yml
@@ -222,8 +222,7 @@ event_ttl_duration: "1h0m0s"
 ## Automatically renew K8S control plane certificates on first Monday of each month
 auto_renew_certificates: false
 # First Monday of each month
-auto_renew_certificates_systemd_calendar: "{{ 'Mon *-*-1,2,3,4,5,6,7 03:' ~
-  groups['kube_control_plane'].index(inventory_hostname) ~ '0:00' }}"
+auto_renew_certificates_systemd_calendar: "Mon *-*-1,2,3,4,5,6,7 03:00:00"
 # kubeadm renews all the certificates during control plane upgrade.
 # If we have requirement like without renewing certs upgrade the cluster,
 # we can opt out from the default behavior by setting kubeadm_upgrade_auto_cert_renewal to false
diff --git a/roles/kubernetes/control-plane/templates/k8s-certs-renew.timer.j2 b/roles/kubernetes/control-plane/templates/k8s-certs-renew.timer.j2
index 904f0073c..cca5aca3e 100644
--- a/roles/kubernetes/control-plane/templates/k8s-certs-renew.timer.j2
+++ b/roles/kubernetes/control-plane/templates/k8s-certs-renew.timer.j2
@@ -3,6 +3,9 @@ Description=Timer to renew K8S control plane certificates
 
 [Timer]
 OnCalendar={{ auto_renew_certificates_systemd_calendar }}
+RandomizedDelaySec={{ 10 * (groups['kube_control_plane'] | length) }}min
+FixedRandomDelay=yes
+Persistent=yes
 
 [Install]
 WantedBy=multi-user.target
-- 
GitLab