From a3e34f589a9add4ccf9f8a0e48be18e63e539253 Mon Sep 17 00:00:00 2001
From: Cristian Calin <6627509+cristicalin@users.noreply.github.com>
Date: Mon, 28 Jun 2021 09:53:25 +0300
Subject: [PATCH] Enable Graceful Node Shutdown for Kubernetes >= 1.21.0
 (#7746)

* Enable Graceful Node Shutdown for Kubernetes >= 1.21.0

* Add sample graceful shutdown parameters
---
 inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml    | 4 ++++
 .../node/templates/kubelet-config.v1beta1.yaml.j2          | 4 ++++
 roles/kubernetes/preinstall/tasks/0020-verify-settings.yml | 7 +++++++
 roles/kubespray-defaults/defaults/main.yaml                | 7 +++++++
 4 files changed, 22 insertions(+)

diff --git a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml
index b0f963008..c369324ff 100644
--- a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml
+++ b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml
@@ -149,6 +149,10 @@ kube_proxy_nodeport_addresses: >-
 ## Encrypting Secret Data at Rest (experimental)
 kube_encrypt_secret_data: false
 
+# Graceful Node Shutdown (Kubernetes >= 1.21.0), see https://kubernetes.io/blog/2021/04/21/graceful-node-shutdown-beta/
+# kubelet_shutdown_grace_period: 60s
+# kubelet_shutdown_grace_period_critical_pods: 20s
+
 # DNS configuration.
 # Kubernetes cluster name, also will be used as DNS domain
 cluster_name: cluster.local
diff --git a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2
index c11af1184..f16862c0e 100644
--- a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2
+++ b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2
@@ -96,3 +96,7 @@ tlsCipherSuites:
 {% if kubelet_event_record_qps %}
 eventRecordQPS: {{ kubelet_event_record_qps }}
 {% endif %}
+{% if kube_version is version('v1.21.0', '>=') %}
+shutdownGracePeriod: {{ kubelet_shutdown_grace_period }}
+shutdownGracePeriodCriticalPods: {{ kubelet_shutdown_grace_period_critical_pods }}
+{% endif %}
diff --git a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml
index 0585a1116..72d1fbf73 100644
--- a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml
+++ b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml
@@ -107,6 +107,13 @@
     - not ignore_assert_errors
     - inventory_hostname in groups['kube_node']
 
+- name: Stop when ShutdownGracePeriod less than ShutdownGracePeriodCriticalPods
+  assert:
+    that: kubelet_shutdown_grace_period > kubelet_shutdown_grace_period_critical_pods
+    msg: "ShutdownGracePeriod ({{ kubelet_shutdown_grace_period }}) needs to be greater than ShutdownGracePeriodCriticalPods ({{ kubelet_shutdown_grace_period_critical_pods }}) in order to give normal pods time to be evacuated, please see https://kubernetes.io/blog/2021/04/21/graceful-node-shutdown-beta/ for details"
+  when:
+    - kube_version is version('v1.21.0', '>=')
+
 # This assertion will fail on the safe side: One can indeed schedule more pods
 # on a node than the CIDR-range has space for when additional pods use the host
 # network namespace. It is impossible to ascertain the number of such pods at
diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml
index 960aceff3..b3c976067 100644
--- a/roles/kubespray-defaults/defaults/main.yaml
+++ b/roles/kubespray-defaults/defaults/main.yaml
@@ -230,6 +230,13 @@ kube_api_aggregator_routing: false
 # Profiling
 kube_profiling: false
 
+# Graceful Node Shutdown
+# This requires kubernetes >= 1.21.0
+kubelet_shutdown_grace_period: 60s
+# kubelet_shutdown_grace_period_critical_pods should be less than kubelet_shutdown_grace_period
+# to give normal pods time to be gracefully evacuated
+kubelet_shutdown_grace_period_critical_pods: 20s
+
 # Container for runtime
 container_manager: docker
 
-- 
GitLab