From 682c8a59c23d6de88f6333696e54332b0692a1ad Mon Sep 17 00:00:00 2001
From: Cristian Calin <6627509+cristicalin@users.noreply.github.com>
Date: Fri, 10 Dec 2021 00:09:06 +0200
Subject: [PATCH] containerd: change default resolvconf_mode to host_resolvconf
 (#8247)

* containerd: change default resolvconf_mode to host_resolvconf

* Wait for kube-apiserver to come back after pod refresh

* Handle resolv.conf gracefully

* Retain currently configured DNS entries to ensure we don't break the resolvers

* Suse uses wickedd for network management so no dhcp hooks

* Molecule: increase ansible timeout

* CI: Increase ansible timeout to 120s for Packet jobs
---
 .gitlab-ci/packet.yml                         |  1 +
 .../group_vars/k8s_cluster/k8s-cluster.yml    |  2 +-
 roles/adduser/molecule/default/molecule.yml   |  4 ++
 .../molecule/default/molecule.yml             |  4 ++
 .../molecule/default/molecule.yml             |  4 ++
 .../containerd/molecule/default/molecule.yml  |  1 +
 .../cri-o/molecule/default/molecule.yml       |  1 +
 .../docker/molecule/default/molecule.yml      |  1 +
 .../gvisor/molecule/default/molecule.yml      |  1 +
 .../molecule/default/molecule.yml             |  1 +
 roles/kubernetes/preinstall/handlers/main.yml | 16 ++++++++
 .../preinstall/tasks/0040-set_facts.yml       | 37 ++++++++++++++++++-
 .../preinstall/tasks/0060-resolvconf.yml      |  7 ++--
 .../preinstall/tasks/0100-dhclient-hooks.yml  |  2 +-
 roles/kubespray-defaults/defaults/main.yaml   |  2 +-
 ...packet_centos7-docker-weave-upgrade-ha.yml |  1 +
 tests/files/packet_centos8-docker.yml         |  1 +
 tests/files/packet_debian10-docker.yml        |  1 +
 tests/files/packet_debian11-docker.yml        |  1 +
 tests/files/packet_fedora34-docker-weave.yml  |  1 +
 .../packet_ubuntu16-docker-weave-sep.yml      |  1 +
 tests/files/packet_ubuntu18-docker.yml        |  1 +
 tests/files/packet_ubuntu20-docker.yml        |  1 +
 23 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/.gitlab-ci/packet.yml b/.gitlab-ci/packet.yml
index de02cd989..41a2a4fe4 100644
--- a/.gitlab-ci/packet.yml
+++ b/.gitlab-ci/packet.yml
@@ -2,6 +2,7 @@
 .packet:
   extends: .testcases
   variables:
+    ANSIBLE_TIMEOUT: "120"
     CI_PLATFORM: packet
     SSH_USER: kubespray
   tags:
diff --git a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml
index 062059206..90b47b86c 100644
--- a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml
+++ b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml
@@ -192,7 +192,7 @@ coredns_k8s_external_zone: k8s_external.local
 enable_coredns_k8s_endpoint_pod_names: false
 
 # Can be docker_dns, host_resolvconf or none
-resolvconf_mode: docker_dns
+resolvconf_mode: host_resolvconf
 # Deploy netchecker app to verify DNS resolve as an HTTP service
 deploy_netchecker: false
 # Ip address of the kubernetes skydns service
diff --git a/roles/adduser/molecule/default/molecule.yml b/roles/adduser/molecule/default/molecule.yml
index 4bb5dce30..80ebdad72 100644
--- a/roles/adduser/molecule/default/molecule.yml
+++ b/roles/adduser/molecule/default/molecule.yml
@@ -15,6 +15,10 @@ platforms:
     memory: 512
 provisioner:
   name: ansible
+  config_options:
+    defaults:
+      callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
 verifier:
diff --git a/roles/bastion-ssh-config/molecule/default/molecule.yml b/roles/bastion-ssh-config/molecule/default/molecule.yml
index 1d84db76c..c0c29ae92 100644
--- a/roles/bastion-ssh-config/molecule/default/molecule.yml
+++ b/roles/bastion-ssh-config/molecule/default/molecule.yml
@@ -15,6 +15,10 @@ platforms:
     memory: 512
 provisioner:
   name: ansible
+  config_options:
+    defaults:
+      callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
   inventory:
diff --git a/roles/bootstrap-os/molecule/default/molecule.yml b/roles/bootstrap-os/molecule/default/molecule.yml
index 081d929e8..0bb61eff6 100644
--- a/roles/bootstrap-os/molecule/default/molecule.yml
+++ b/roles/bootstrap-os/molecule/default/molecule.yml
@@ -35,6 +35,10 @@ platforms:
     memory: 512
 provisioner:
   name: ansible
+  config_options:
+    defaults:
+      callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
   inventory:
diff --git a/roles/container-engine/containerd/molecule/default/molecule.yml b/roles/container-engine/containerd/molecule/default/molecule.yml
index fb2cb9f9d..ebe3595b6 100644
--- a/roles/container-engine/containerd/molecule/default/molecule.yml
+++ b/roles/container-engine/containerd/molecule/default/molecule.yml
@@ -46,6 +46,7 @@ provisioner:
   config_options:
     defaults:
       callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
     options:
diff --git a/roles/container-engine/cri-o/molecule/default/molecule.yml b/roles/container-engine/cri-o/molecule/default/molecule.yml
index 2ca990c1d..56e6abd61 100644
--- a/roles/container-engine/cri-o/molecule/default/molecule.yml
+++ b/roles/container-engine/cri-o/molecule/default/molecule.yml
@@ -38,6 +38,7 @@ provisioner:
   config_options:
     defaults:
       callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
     options:
diff --git a/roles/container-engine/docker/molecule/default/molecule.yml b/roles/container-engine/docker/molecule/default/molecule.yml
index eaf6fae50..c30366215 100644
--- a/roles/container-engine/docker/molecule/default/molecule.yml
+++ b/roles/container-engine/docker/molecule/default/molecule.yml
@@ -18,6 +18,7 @@ provisioner:
   config_options:
     defaults:
       callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
     options:
diff --git a/roles/container-engine/gvisor/molecule/default/molecule.yml b/roles/container-engine/gvisor/molecule/default/molecule.yml
index fc4ec0276..657dc2862 100644
--- a/roles/container-engine/gvisor/molecule/default/molecule.yml
+++ b/roles/container-engine/gvisor/molecule/default/molecule.yml
@@ -30,6 +30,7 @@ provisioner:
   config_options:
     defaults:
       callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
     options:
diff --git a/roles/container-engine/kata-containers/molecule/default/molecule.yml b/roles/container-engine/kata-containers/molecule/default/molecule.yml
index 164a47083..bb9812054 100644
--- a/roles/container-engine/kata-containers/molecule/default/molecule.yml
+++ b/roles/container-engine/kata-containers/molecule/default/molecule.yml
@@ -30,6 +30,7 @@ provisioner:
   config_options:
     defaults:
       callback_whitelist: profile_tasks
+      timeout: 120
   lint:
     name: ansible-lint
     options:
diff --git a/roles/kubernetes/preinstall/handlers/main.yml b/roles/kubernetes/preinstall/handlers/main.yml
index 54a5f6a67..667465b6f 100644
--- a/roles/kubernetes/preinstall/handlers/main.yml
+++ b/roles/kubernetes/preinstall/handlers/main.yml
@@ -9,6 +9,7 @@
     - Preinstall | restart kube-controller-manager crio/containerd
     - Preinstall | restart kube-apiserver docker
     - Preinstall | restart kube-apiserver crio/containerd
+    - Preinstall | wait for the apiserver to be running
   when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] and not is_fedora_coreos
 
 - name: Preinstall | update resolvconf for Flatcar Container Linux by Kinvolk
@@ -101,6 +102,21 @@
     - dns_mode != 'none'
     - resolvconf_mode == 'host_resolvconf'
 
+# When running this as the last phase ensure we wait for kube-apiserver to come up
+- name: Preinstall | wait for the apiserver to be running
+  uri:
+    url: "{{ kube_apiserver_endpoint }}/healthz"
+    validate_certs: no
+  register: result
+  until: result.status == 200
+  retries: 60
+  delay: 1
+  when:
+    - dns_late
+    - inventory_hostname in groups['kube_control_plane']
+    - dns_mode != 'none'
+    - resolvconf_mode == 'host_resolvconf'
+
 - name: Preinstall | Restart systemd-resolved
   service:
     name: systemd-resolved
diff --git a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml
index cf6612b00..1cfd47777 100644
--- a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml
+++ b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml
@@ -34,6 +34,39 @@
   changed_when: false
   check_mode: no
 
+- name: check existence of /etc/resolvconf/resolv.conf.d
+  stat:
+    path: /etc/resolvconf/resolv.conf.d
+    get_attributes: no
+    get_checksum: no
+    get_mime: no
+  failed_when: false
+  register: resolvconfd_path
+
+- name: check status of /etc/resolv.conf
+  stat:
+    path: /etc/resolv.conf
+    follow: no
+    get_attributes: no
+    get_checksum: no
+    get_mime: no
+  failed_when: false
+  register: resolvconf_stat
+
+- block:
+
+    - name: get content of /etc/resolv.conf
+      slurp:
+        src: /etc/resolv.conf
+      register: resolvconf_slurp
+
+    - name: get currently configured nameservers
+      set_fact:
+        configured_nameservers: "{{ resolvconf_slurp.content | b64decode | regex_findall('\\s*nameserver\\s*(.*)') | ipaddr }}"
+      when: resolvconf_slurp.content is defined
+
+  when: resolvconf_stat.stat.exists is defined and resolvconf_stat.stat.exists
+
 - name: check systemd-resolved
   # noqa 303 Should we use service_facts for this?
   command: systemctl is-active systemd-resolved
@@ -45,7 +78,7 @@
 - name: set dns facts
   set_fact:
     resolvconf: >-
-      {%- if resolvconf.rc == 0 -%}true{%- else -%}false{%- endif -%}
+      {%- if resolvconf.rc == 0 and resolvconfd_path.stat.isdir is defined and resolvconfd_path.stat.isdir -%}true{%- else -%}false{%- endif -%}
     bogus_domains: |-
       {% for d in [ 'default.svc.' + dns_domain, 'svc.' + dns_domain ] + searchdomains|default([]) -%}
       {{ dns_domain }}.{{ d }}./{{ d }}.{{ d }}./com.{{ d }}./
@@ -147,7 +180,7 @@
 - name: generate nameservers to resolvconf
   set_fact:
     nameserverentries:
-      nameserver {{ ( ( [nodelocaldns_ip] if enable_nodelocaldns else []) + coredns_server|d([]) + nameservers|d([]) + cloud_resolver|d([])) | unique | join(',nameserver ') }}
+      nameserver {{ ( ( [nodelocaldns_ip] if enable_nodelocaldns else []) + coredns_server|d([]) + nameservers|d([]) + cloud_resolver|d([]) + configured_nameservers|d([])) | unique | join(',nameserver ') }}
     supersede_nameserver:
       supersede domain-name-servers {{ ( coredns_server|d([]) + nameservers|d([]) + cloud_resolver|d([])) | unique | join(', ') }};
 
diff --git a/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml b/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml
index a34d031b1..65b55d7fb 100644
--- a/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml
+++ b/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml
@@ -16,7 +16,7 @@
     state: present
     insertbefore: BOF
     create: yes
-    backup: yes
+    backup: "{{ not resolvconf_stat.stat.islnk }}"
     marker: "# Ansible entries {mark}"
     mode: 0644
   notify: Preinstall | propagate resolvconf to k8s components
@@ -25,7 +25,7 @@
   replace:
     path: "{{ item[0] }}"
     regexp: '^{{ item[1] }}[^#]*(?=# Ansible entries BEGIN)'
-    backup: yes
+    backup: "{{ not resolvconf_stat.stat.islnk }}"
   with_nested:
     - "{{ [resolvconffile, base|default(''), head|default('')] | difference(['']) }}"
     - [ 'search ', 'nameserver ', 'domain ', 'options ' ]
@@ -36,13 +36,12 @@
     path: "{{ item[0] }}"
     regexp: '(# Ansible entries END\n(?:(?!^{{ item[1] }}).*\n)*)(?:^{{ item[1] }}.*\n?)+'
     replace: '\1'
-    backup: yes
+    backup: "{{ not resolvconf_stat.stat.islnk }}"
   with_nested:
     - "{{ [resolvconffile, base|default(''), head|default('')] | difference(['']) }}"
     - [ 'search ', 'nameserver ', 'domain ', 'options ' ]
   notify: Preinstall | propagate resolvconf to k8s components
 
-
 - name: get temporary resolveconf cloud init file content
   command: cat {{ resolvconffile }}
   register: cloud_config
diff --git a/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml b/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml
index 28aed0740..f240d0fdf 100644
--- a/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml
+++ b/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml
@@ -22,7 +22,7 @@
     owner: root
     mode: 0755
   notify: Preinstall | propagate resolvconf to k8s components
-  when: ansible_os_family != "RedHat"
+  when: ansible_os_family not in [ "RedHat", "Suse" ]
 
 - name: Configure dhclient hooks for resolv.conf (RH-only)
   template:
diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml
index 4d88c2889..12a28b9af 100644
--- a/roles/kubespray-defaults/defaults/main.yaml
+++ b/roles/kubespray-defaults/defaults/main.yaml
@@ -106,7 +106,7 @@ nodelocaldns_secondary_skew_seconds: 5
 manual_dns_server: ""
 
 # Can be docker_dns, host_resolvconf or none
-resolvconf_mode: docker_dns
+resolvconf_mode: host_resolvconf
 # Deploy netchecker app to verify DNS resolve as an HTTP service
 deploy_netchecker: false
 # Ip address of the kubernetes DNS service (called skydns for historical reasons)
diff --git a/tests/files/packet_centos7-docker-weave-upgrade-ha.yml b/tests/files/packet_centos7-docker-weave-upgrade-ha.yml
index 92d39306b..265c97dde 100644
--- a/tests/files/packet_centos7-docker-weave-upgrade-ha.yml
+++ b/tests/files/packet_centos7-docker-weave-upgrade-ha.yml
@@ -10,6 +10,7 @@ kubernetes_audit: true
 # Docker specific settings:
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
 
 # Needed to upgrade from 1.16 to 1.17, otherwise upgrade is partial and bug followed
 upgrade_cluster_setup: true
diff --git a/tests/files/packet_centos8-docker.yml b/tests/files/packet_centos8-docker.yml
index fb7e9ba49..3d53119cd 100644
--- a/tests/files/packet_centos8-docker.yml
+++ b/tests/files/packet_centos8-docker.yml
@@ -10,3 +10,4 @@ calico_iptables_backend: "Auto"
 # Use docker
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
diff --git a/tests/files/packet_debian10-docker.yml b/tests/files/packet_debian10-docker.yml
index a59371968..fc55e7f7f 100644
--- a/tests/files/packet_debian10-docker.yml
+++ b/tests/files/packet_debian10-docker.yml
@@ -6,3 +6,4 @@ mode: default
 # Use docker
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
diff --git a/tests/files/packet_debian11-docker.yml b/tests/files/packet_debian11-docker.yml
index 3b93dd06e..69ec8eb25 100644
--- a/tests/files/packet_debian11-docker.yml
+++ b/tests/files/packet_debian11-docker.yml
@@ -6,3 +6,4 @@ mode: default
 # Use docker
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
diff --git a/tests/files/packet_fedora34-docker-weave.yml b/tests/files/packet_fedora34-docker-weave.yml
index f9153538f..2fdef725d 100644
--- a/tests/files/packet_fedora34-docker-weave.yml
+++ b/tests/files/packet_fedora34-docker-weave.yml
@@ -9,3 +9,4 @@ kube_network_plugin: weave
 # Docker specific settings:
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
diff --git a/tests/files/packet_ubuntu16-docker-weave-sep.yml b/tests/files/packet_ubuntu16-docker-weave-sep.yml
index c49c6307e..9b268e77e 100644
--- a/tests/files/packet_ubuntu16-docker-weave-sep.yml
+++ b/tests/files/packet_ubuntu16-docker-weave-sep.yml
@@ -10,6 +10,7 @@ auto_renew_certificates: true
 # Docker specific settings:
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
 
 # Ubuntu 16 - docker containerd package available stopped at 1.4.6
 docker_containerd_version: latest
diff --git a/tests/files/packet_ubuntu18-docker.yml b/tests/files/packet_ubuntu18-docker.yml
index 74f5fa34d..548ff371e 100644
--- a/tests/files/packet_ubuntu18-docker.yml
+++ b/tests/files/packet_ubuntu18-docker.yml
@@ -7,3 +7,4 @@ vm_memory: 1600Mi
 # Use docker
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
diff --git a/tests/files/packet_ubuntu20-docker.yml b/tests/files/packet_ubuntu20-docker.yml
index ca7c3c7e7..4089a6605 100644
--- a/tests/files/packet_ubuntu20-docker.yml
+++ b/tests/files/packet_ubuntu20-docker.yml
@@ -14,3 +14,4 @@ enable_nodelocaldns: False
 # Use docker
 container_manager: docker
 etcd_deployment_type: docker
+resolvconf_mode: docker_dns
-- 
GitLab