From 6e5b9e0ebfa5d0c037356da8825be883e24069fc Mon Sep 17 00:00:00 2001
From: Pasquale Toscano <pasqualetoscano90@gmail.com>
Date: Fri, 5 Nov 2021 15:59:54 +0100
Subject: [PATCH] Fix Kubelet and Containerd when using cgroupfs as cgroup
 driver (#8123)

---
 docs/vars.md                                    |  2 +-
 .../containerd/defaults/main.yml                | 16 +++++++++-------
 .../container-engine/containerd/tasks/facts.yml |  6 ++++++
 .../container-engine/containerd/tasks/main.yml  |  4 ++++
 .../containerd/templates/config.toml.j2         | 10 +++++++++-
 roles/kubernetes/node/defaults/main.yml         | 12 ++++++++++--
 roles/kubernetes/node/tasks/facts.yml           | 17 +++++++++++++++++
 .../templates/kubelet-config.v1beta1.yaml.j2    |  2 +-
 8 files changed, 57 insertions(+), 12 deletions(-)
 create mode 100644 roles/container-engine/containerd/tasks/facts.yml

diff --git a/docs/vars.md b/docs/vars.md
index 51129cfc2..be812042a 100644
--- a/docs/vars.md
+++ b/docs/vars.md
@@ -119,7 +119,7 @@ Stack](https://github.com/kubernetes-sigs/kubespray/blob/master/docs/dns-stack.m
   ``--insecure-registry=myregistry.mydomain:5000``
 * *docker_plugins* - This list can be used to define [Docker plugins](https://docs.docker.com/engine/extend/) to install.
 * *containerd_default_runtime* - Sets the default Containerd runtime used by the Kubernetes CRI plugin.
-* *containerd_runtimes* - Sets the Containerd runtime attributes used by the Kubernetes CRI plugin.
+* *containerd_additional_runtimes* - Sets the additional Containerd runtimes used by the Kubernetes CRI plugin.
   [Default config](https://github.com/kubernetes-sigs/kubespray/blob/master/roles/container-engine/containerd/defaults/main.yml) can be overriden in inventory vars.
 * *http_proxy/https_proxy/no_proxy/no_proxy_exclude_workers/additional_no_proxy* - Proxy variables for deploying behind a
   proxy. Note that no_proxy defaults to all internal cluster IPs and hostnames
diff --git a/roles/container-engine/containerd/defaults/main.yml b/roles/container-engine/containerd/defaults/main.yml
index bfab4aaa8..f01f85f38 100644
--- a/roles/container-engine/containerd/defaults/main.yml
+++ b/roles/container-engine/containerd/defaults/main.yml
@@ -7,13 +7,15 @@ containerd_oom_score: 0
 containerd_default_runtime: "runc"
 # containerd_snapshotter: "native"
 
-containerd_runtimes:
-  - name: runc
-    type: "io.containerd.runc.v2"
-    engine: ""
-    root: ""
-    options:
-      systemdCgroup: "true"
+containerd_runc_runtime:
+  name: runc
+  type: "io.containerd.runc.v2"
+  engine: ""
+  root: ""
+  options:
+    systemCgroup: "true"
+
+containerd_additional_runtimes: []
 # Example for Kata Containers as additional runtime:
 #  - name: kata
 #    type: "io.containerd.kata.v2"
diff --git a/roles/container-engine/containerd/tasks/facts.yml b/roles/container-engine/containerd/tasks/facts.yml
new file mode 100644
index 000000000..987b78458
--- /dev/null
+++ b/roles/container-engine/containerd/tasks/facts.yml
@@ -0,0 +1,6 @@
+---
+
+- name: set kubelet_config_extra_args options when cgroupfs is used
+  set_fact:
+    containerd_runc_runtime: "{{ containerd_runc_runtime | combine({'options':{'systemCgroup':'false'}}) }}"
+  when: not containerd_use_systemd_cgroup
diff --git a/roles/container-engine/containerd/tasks/main.yml b/roles/container-engine/containerd/tasks/main.yml
index 7088f340d..39779e78c 100644
--- a/roles/container-engine/containerd/tasks/main.yml
+++ b/roles/container-engine/containerd/tasks/main.yml
@@ -1,4 +1,8 @@
 ---
+- import_tasks: facts.yml
+  tags:
+    - facts
+
 - name: Fail containerd setup if distribution is not supported
   fail:
     msg: "{{ ansible_distribution }} is not supported by containerd."
diff --git a/roles/container-engine/containerd/templates/config.toml.j2 b/roles/container-engine/containerd/templates/config.toml.j2
index 35c4f933a..48f3628e0 100644
--- a/roles/container-engine/containerd/templates/config.toml.j2
+++ b/roles/container-engine/containerd/templates/config.toml.j2
@@ -22,7 +22,15 @@ oom_score = {{ containerd_oom_score }}
       default_runtime_name = "{{ containerd_default_runtime | default('runc') }}"
       snapshotter = "{{ containerd_snapshotter | default('overlayfs') }}"
       [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
-{% for runtime in containerd_runtimes %}
+        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ containerd_runc_runtime.name }}]
+          runtime_type = "{{ containerd_runc_runtime.type }}"
+          runtime_engine = "{{ containerd_runc_runtime.engine}}"
+          runtime_root = "{{ containerd_runc_runtime.root }}"
+          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ containerd_runc_runtime.name }}.options]
+{% for key, value in containerd_runc_runtime.options.items() %}
+            {{ key }} = {{ value }}
+{% endfor %}
+{% for runtime in containerd_additional_runtimes %}
         [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ runtime.name }}]
           runtime_type = "{{ runtime.type }}"
           runtime_engine = "{{ runtime.engine }}"
diff --git a/roles/kubernetes/node/defaults/main.yml b/roles/kubernetes/node/defaults/main.yml
index 686e2e609..d42fa555a 100644
--- a/roles/kubernetes/node/defaults/main.yml
+++ b/roles/kubernetes/node/defaults/main.yml
@@ -14,11 +14,14 @@ kube_resolv_conf: "/etc/resolv.conf"
 # Set to empty to avoid cgroup creation
 kubelet_enforce_node_allocatable: "\"\""
 
-# Set runtime cgroups
+# Set runtime and kubelet cgroups when using systemd as cgroup driver (default)
 kubelet_runtime_cgroups: "/systemd/system.slice"
-# Set kubelet cgroups
 kubelet_kubelet_cgroups: "/systemd/system.slice"
 
+# Set runtime and kubelet cgroups when using cgroupfs as cgroup driver
+kubelet_runtime_cgroups_cgroupfs: "/system.slice/containerd.service"
+kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.slice"
+
 ### fail with swap on (default true)
 kubelet_fail_swap_on: true
 
@@ -66,6 +69,11 @@ kubelet_max_pods: 110
 ## Support parameters to be passed to kubelet via kubelet-config.yaml
 kubelet_config_extra_args: {}
 
+## Parameters to be passed to kubelet via kubelet-config.yaml when cgroupfs is used as cgroup driver
+kubelet_config_extra_args_cgroupfs:
+  systemCgroups: /system.slice
+  cgroupRoot: /
+
 ## Support parameters to be passed to kubelet via kubelet-config.yaml only on nodes, not masters
 kubelet_node_config_extra_args: {}
 
diff --git a/roles/kubernetes/node/tasks/facts.yml b/roles/kubernetes/node/tasks/facts.yml
index b7b3ad011..d4bd428f5 100644
--- a/roles/kubernetes/node/tasks/facts.yml
+++ b/roles/kubernetes/node/tasks/facts.yml
@@ -27,6 +27,23 @@
       {%- if containerd_use_systemd_cgroup -%}systemd{%- else -%}cgroupfs{%- endif -%}
   when: container_manager == 'containerd'
 
+- name: set kubelet_cgroup_driver
+  set_fact:
+    kubelet_cgroup_driver: "{{ kubelet_cgroup_driver_detected }}"
+  when: kubelet_cgroup_driver is undefined
+
+- name: set kubelet_cgroups options when cgroupfs is used
+  set_fact:
+    kubelet_runtime_cgroups: "{{ kubelet_runtime_cgroups_cgroupfs }}"
+    kubelet_kubelet_cgroups: "{{ kubelet_kubelet_cgroups_cgroupfs }}"
+  when: kubelet_cgroup_driver == 'cgroupfs'
+
+- name: set kubelet_config_extra_args options when cgroupfs is used
+  vars:
+  set_fact:
+    kubelet_config_extra_args: "{{ kubelet_config_extra_args | combine(kubelet_config_extra_args_cgroupfs) }}"
+  when: kubelet_cgroup_driver == 'cgroupfs'
+
 - name: os specific vars
   include_vars: "{{ item }}"
   with_first_found:
diff --git a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2
index 673c61788..13ed5f4c4 100644
--- a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2
+++ b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2
@@ -23,7 +23,7 @@ enforceNodeAllocatable:
 {% endfor %}
 {% endif %}
 staticPodPath: {{ kube_manifest_dir }}
-cgroupDriver: {{ kubelet_cgroup_driver|default(kubelet_cgroup_driver_detected) }}
+cgroupDriver: {{ kubelet_cgroup_driver }}
 containerLogMaxFiles: {{ kubelet_logfiles_max_nr }}
 containerLogMaxSize: {{ kubelet_logfiles_max_size }}
 maxPods: {{ kubelet_max_pods }}
-- 
GitLab