From bbd116114760abd2895293010a9df55c645e8bc6 Mon Sep 17 00:00:00 2001
From: Tristan <tde@hey.com>
Date: Mon, 22 Aug 2022 10:37:44 +0100
Subject: [PATCH] 9035: Make Cilium rolling-restart delay/timeout configurable
 (#9176)

See #9035
---
 docs/cilium.md                                | 29 +++++++++++++++++++
 roles/network_plugin/cilium/defaults/main.yml |  4 +++
 roles/network_plugin/cilium/tasks/apply.yml   |  4 +--
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/docs/cilium.md b/docs/cilium.md
index 665e319ae..e9c3e0d2b 100644
--- a/docs/cilium.md
+++ b/docs/cilium.md
@@ -153,3 +153,32 @@ cilium_hubble_metrics:
 ```
 
 [More](https://docs.cilium.io/en/v1.9/operations/metrics/#hubble-exported-metrics)
+
+## Upgrade considerations
+
+### Rolling-restart timeouts
+
+Cilium relies on the kernel's BPF support, which is extremely fast at runtime but incurs a compilation penalty on initialization and update.
+
+As a result, the Cilium DaemonSet pods can take a significant time to start, which scales with the number of nodes and endpoints in your cluster.
+
+As part of cluster.yml, this DaemonSet is restarted, and Kubespray's [default timeouts for this operation](../roles/network_plugin/cilium/defaults/main.yml)
+are not appropriate for large clusters.
+
+This means that you will likely want to update these timeouts to a value more in-line with your cluster's number of nodes and their respective CPU performance.
+This is configured by the following values:
+
+```yaml
+# Configure how long to wait for the Cilium DaemonSet to be ready again
+cilium_rolling_restart_wait_retries_count: 30
+cilium_rolling_restart_wait_retries_delay_seconds: 10
+```
+
+The total time allowed (count * delay) should be at least `($number_of_nodes_in_cluster * $cilium_pod_start_time)` for successful rolling updates. There are no
+drawbacks to making it higher and giving yourself a time buffer to accommodate transient slowdowns.
+
+Note: To find the `$cilium_pod_start_time` for your cluster, you can simply restart a Cilium pod on a node of your choice and look at how long it takes for it
+to become ready.
+
+Note 2: The default CPU requests/limits for Cilium pods is set to a very conservative 100m:500m which will likely yield very slow startup for Cilium pods. You
+probably want to significantly increase the CPU limit specifically if short bursts of CPU from Cilium are acceptable to you.
diff --git a/roles/network_plugin/cilium/defaults/main.yml b/roles/network_plugin/cilium/defaults/main.yml
index e244735d9..c590637dc 100644
--- a/roles/network_plugin/cilium/defaults/main.yml
+++ b/roles/network_plugin/cilium/defaults/main.yml
@@ -236,3 +236,7 @@ cilium_enable_bpf_clock_probe: true
 
 # -- Whether to enable CNP status updates.
 cilium_disable_cnp_status_updates: true
+
+# Configure how long to wait for the Cilium DaemonSet to be ready again
+cilium_rolling_restart_wait_retries_count: 30
+cilium_rolling_restart_wait_retries_delay_seconds: 10
diff --git a/roles/network_plugin/cilium/tasks/apply.yml b/roles/network_plugin/cilium/tasks/apply.yml
index ac323a4aa..b977c2177 100644
--- a/roles/network_plugin/cilium/tasks/apply.yml
+++ b/roles/network_plugin/cilium/tasks/apply.yml
@@ -14,8 +14,8 @@
   command: "{{ kubectl }} -n kube-system get pods -l k8s-app=cilium -o jsonpath='{.items[?(@.status.containerStatuses[0].ready==false)].metadata.name}'"  # noqa 601
   register: pods_not_ready
   until: pods_not_ready.stdout.find("cilium")==-1
-  retries: 30
-  delay: 10
+  retries: "{{ cilium_rolling_restart_wait_retries_count | int }}"
+  delay: "{{ cilium_rolling_restart_wait_retries_delay_seconds | int }}"
   failed_when: false
   when: inventory_hostname == groups['kube_control_plane'][0]
 
-- 
GitLab