From dd4bc5fbfe92611de9021bfed319f6c8b8016565 Mon Sep 17 00:00:00 2001
From: Samuel Liu <liupeng0518@gmail.com>
Date: Fri, 9 Sep 2022 16:29:22 +0800
Subject: [PATCH] [etcd] Sometimes, we do not need to run etcd role on all
 nodes. (#9173)

* WIP: sometimes,we not run etcd

* fix ansible lint

* like calico(kdd) cni, no need run etcd
---
 cluster.yml                                 |  7 +-
 roles/etcd/defaults/main.yml                |  2 +-
 roles/etcd/tasks/check_certs.yml            | 38 ++++++++--
 roles/etcd/tasks/gen_certs_script.yml       | 78 ++++++++++-----------
 roles/etcd/tasks/gen_nodes_certs_script.yml | 32 +++++++++
 roles/etcd/tasks/main.yml                   | 18 ++++-
 scale.yml                                   | 16 ++++-
 upgrade-cluster.yml                         |  7 +-
 8 files changed, 140 insertions(+), 58 deletions(-)
 create mode 100644 roles/etcd/tasks/gen_nodes_certs_script.yml

diff --git a/cluster.yml b/cluster.yml
index cc169f80b..5f163de6a 100644
--- a/cluster.yml
+++ b/cluster.yml
@@ -35,7 +35,7 @@
     - { role: "container-engine", tags: "container-engine", when: deploy_container_engine }
     - { role: download, tags: download, when: "not skip_downloads" }
 
-- hosts: etcd
+- hosts: etcd:kube_control_plane
   gather_facts: False
   any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
   environment: "{{ proxy_disable_env }}"
@@ -59,7 +59,10 @@
       vars:
         etcd_cluster_setup: false
         etcd_events_cluster_setup: false
-      when: etcd_deployment_type != "kubeadm"
+      when:
+        - etcd_deployment_type != "kubeadm"
+        - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+        - kube_network_plugin != "calico" or calico_datastore == "etcd"
 
 - hosts: k8s_cluster
   gather_facts: False
diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml
index 1f11e8ddc..bf38acee5 100644
--- a/roles/etcd/defaults/main.yml
+++ b/roles/etcd/defaults/main.yml
@@ -66,7 +66,7 @@ etcd_memory_limit: "{% if ansible_memtotal_mb < 4096 %}512M{% else %}0{% endif %
 
 etcd_blkio_weight: 1000
 
-etcd_node_cert_hosts: "{{ groups['k8s_cluster'] | union(groups.get('calico_rr', [])) }}"
+etcd_node_cert_hosts: "{{ groups['k8s_cluster'] }}"
 
 etcd_compaction_retention: "8"
 
diff --git a/roles/etcd/tasks/check_certs.yml b/roles/etcd/tasks/check_certs.yml
index ed0580b55..c688c16d8 100644
--- a/roles/etcd/tasks/check_certs.yml
+++ b/roles/etcd/tasks/check_certs.yml
@@ -33,14 +33,13 @@
   stat:
     path: "{{ etcd_cert_dir }}/{{ item }}"
   register: etcd_node_certs
-  when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or
-        inventory_hostname in groups['k8s_cluster'])
+  when: inventory_hostname in groups['k8s_cluster']
   with_items:
     - ca.pem
     - node-{{ inventory_hostname }}.pem
     - node-{{ inventory_hostname }}-key.pem
 
-- name: "Check_certs | Set 'gen_certs' to true if expected certificates are not on the first etcd node"
+- name: "Check_certs | Set 'gen_certs' to true if expected certificates are not on the first etcd node(1/2)"
   set_fact:
     gen_certs: true
   when: force_etcd_cert_refresh or not item in etcdcert_master.files|map(attribute='path') | list
@@ -56,13 +55,39 @@
         '{{ etcd_cert_dir }}/member-{{ host }}.pem',
         '{{ etcd_cert_dir }}/member-{{ host }}-key.pem',
       {% endfor %}
-      {% set k8s_nodes = groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort %}
+      {% set k8s_nodes = groups['kube_control_plane'] %}
       {% for host in k8s_nodes %}
         '{{ etcd_cert_dir }}/node-{{ host }}.pem',
         '{{ etcd_cert_dir }}/node-{{ host }}-key.pem'
         {% if not loop.last %}{{','}}{% endif %}
       {% endfor %}]
 
+- name: "Check_certs | Set 'gen_certs' to true if expected certificates are not on the first etcd node(2/2)"
+  set_fact:
+    gen_certs: true
+  run_once: true
+  with_items: "{{ expected_files }}"
+  vars:
+    expected_files: >-
+      ['{{ etcd_cert_dir }}/ca.pem',
+      {% set etcd_members = groups['etcd'] %}
+      {% for host in etcd_members %}
+        '{{ etcd_cert_dir }}/admin-{{ host }}.pem',
+        '{{ etcd_cert_dir }}/admin-{{ host }}-key.pem',
+        '{{ etcd_cert_dir }}/member-{{ host }}.pem',
+        '{{ etcd_cert_dir }}/member-{{ host }}-key.pem',
+      {% endfor %}
+      {% set k8s_nodes = groups['k8s_cluster']|unique|sort %}
+      {% for host in k8s_nodes %}
+        '{{ etcd_cert_dir }}/node-{{ host }}.pem',
+        '{{ etcd_cert_dir }}/node-{{ host }}-key.pem'
+        {% if not loop.last %}{{','}}{% endif %}
+      {% endfor %}]
+  when:
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
+    - force_etcd_cert_refresh or not item in etcdcert_master.files|map(attribute='path') | list
+
 - name: "Check_certs | Set 'gen_master_certs' object to track whether member and admin certs exist on first etcd node"
   set_fact:
     gen_master_certs: |-
@@ -89,7 +114,7 @@
   set_fact:
     gen_node_certs: |-
       {
-      {% set k8s_nodes = groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort -%}
+      {% set k8s_nodes = groups['k8s_cluster'] -%}
       {% set existing_certs = etcdcert_master.files|map(attribute='path')|list|sort %}
       {% for host in k8s_nodes -%}
         {% set host_cert = "%s/node-%s.pem"|format(etcd_cert_dir, host) %}
@@ -125,8 +150,7 @@
   set_fact:
     kubernetes_host_requires_sync: true
   when:
-    - (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or
-      inventory_hostname in groups['k8s_cluster']) and
+    - inventory_hostname in groups['k8s_cluster'] and
       inventory_hostname not in groups['etcd']
     - (not etcd_node_certs.results[0].stat.exists|default(false)) or
       (not etcd_node_certs.results[1].stat.exists|default(false)) or
diff --git a/roles/etcd/tasks/gen_certs_script.yml b/roles/etcd/tasks/gen_certs_script.yml
index fb619bdb0..eb97a824d 100644
--- a/roles/etcd/tasks/gen_certs_script.yml
+++ b/roles/etcd/tasks/gen_certs_script.yml
@@ -38,7 +38,7 @@
     - gen_certs|default(false)
     - inventory_hostname == groups['etcd'][0]
 
-- name: Gen_certs | run cert generation script
+- name: Gen_certs | run cert generation script for etcd and kube control plane nodes
   command: "bash -x {{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}"
   environment:
     - MASTERS: "{% for m in groups['etcd'] %}
@@ -46,7 +46,7 @@
                     {{ m }}
                   {% endif %}
                 {% endfor %}"
-    - HOSTS: "{% for h in (groups['k8s_cluster'] + groups['calico_rr']|default([]))|unique %}
+    - HOSTS: "{% for h in groups['kube_control_plane'] %}
                 {% if gen_node_certs[h] %}
                     {{ h }}
                 {% endif %}
@@ -56,7 +56,23 @@
   when: gen_certs|default(false)
   notify: set etcd_secret_changed
 
-- name: Gen_certs | Gather etcd member and admin certs from first etcd node
+- name: Gen_certs | run cert generation script for all clients
+  command: "bash -x {{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}"
+  environment:
+    - HOSTS: "{% for h in groups['k8s_cluster'] %}
+                {% if gen_node_certs[h] %}
+                    {{ h }}
+                {% endif %}
+              {% endfor %}"
+  run_once: yes
+  delegate_to: "{{ groups['etcd'][0] }}"
+  when:
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
+    - gen_certs|default(false)
+  notify: set etcd_secret_changed
+
+- name: Gen_certs | Gather etcd member/admin and kube_control_plane clinet certs from first etcd node
   slurp:
     src: "{{ item }}"
   register: etcd_master_certs
@@ -69,6 +85,10 @@
         '{{ etcd_cert_dir }}/member-{{ node }}.pem',
         '{{ etcd_cert_dir }}/member-{{ node }}-key.pem',
         {% endfor %}]"
+    - "[{% for node in (groups['kube_control_plane']) %}
+        '{{ etcd_cert_dir }}/node-{{ node }}.pem',
+        '{{ etcd_cert_dir }}/node-{{ node }}-key.pem',
+        {% endfor %}]"
   delegate_to: "{{ groups['etcd'][0] }}"
   when:
     - inventory_hostname in groups['etcd']
@@ -76,7 +96,7 @@
     - inventory_hostname != groups['etcd'][0]
   notify: set etcd_secret_changed
 
-- name: Gen_certs | Write etcd member and admin certs to other etcd nodes
+- name: Gen_certs | Write etcd member/admin and kube_control_plane clinet certs to other etcd nodes
   copy:
     dest: "{{ item.item }}"
     content: "{{ item.content | b64decode }}"
@@ -96,7 +116,7 @@
     src: "{{ item }}"
   register: etcd_master_node_certs
   with_items:
-    - "[{% for node in (groups['k8s_cluster'] + groups['calico_rr']|default([]))|unique %}
+    - "[{% for node in groups['k8s_cluster'] %}
         '{{ etcd_cert_dir }}/node-{{ node }}.pem',
         '{{ etcd_cert_dir }}/node-{{ node }}-key.pem',
         {% endfor %}]"
@@ -104,6 +124,8 @@
   when:
     - inventory_hostname in groups['etcd']
     - inventory_hostname != groups['etcd'][0]
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
   notify: set etcd_secret_changed
 
 - name: Gen_certs | Write node certs to other etcd nodes
@@ -117,47 +139,21 @@
   when:
     - inventory_hostname in groups['etcd']
     - inventory_hostname != groups['etcd'][0]
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
   loop_control:
     label: "{{ item.item }}"
 
-- name: Gen_certs | Set cert names per node
-  set_fact:
-    my_etcd_node_certs: [ 'ca.pem',
-                          'node-{{ inventory_hostname }}.pem',
-                          'node-{{ inventory_hostname }}-key.pem']
-  tags:
-    - facts
-
-- name: "Check_certs | Set 'sync_certs' to true on nodes"
-  set_fact:
-    sync_certs: true
-  when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or
-        inventory_hostname in groups['k8s_cluster']) and
-        inventory_hostname not in groups['etcd']
-  with_items:
-    - "{{ my_etcd_node_certs }}"
-
-- name: Gen_certs | Gather node certs
-  shell: "set -o pipefail && tar cfz - -C {{ etcd_cert_dir }} {{ my_etcd_node_certs|join(' ') }} | base64 --wrap=0"
-  args:
-    executable: /bin/bash
-    warn: false
-  no_log: "{{ not (unsafe_show_logs|bool) }}"
-  register: etcd_node_certs
-  check_mode: no
-  delegate_to: "{{ groups['etcd'][0] }}"
-  when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or
-        inventory_hostname in groups['k8s_cluster']) and
+- include_tasks: gen_nodes_certs_script.yml
+  when:
+    - inventory_hostname in groups['kube_control_plane'] and
         sync_certs|default(false) and inventory_hostname not in groups['etcd']
 
-- name: Gen_certs | Copy certs on nodes
-  shell: "set -o pipefail && base64 -d <<< '{{ etcd_node_certs.stdout|quote }}' | tar xz -C {{ etcd_cert_dir }}"
-  args:
-    executable: /bin/bash
-  no_log: "{{ not (unsafe_show_logs|bool) }}"
-  changed_when: false
-  when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or
-        inventory_hostname in groups['k8s_cluster']) and
+- include_tasks: gen_nodes_certs_script.yml
+  when:
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
+    - inventory_hostname in groups['k8s_cluster'] and
         sync_certs|default(false) and inventory_hostname not in groups['etcd']
 
 - name: Gen_certs | check certificate permissions
diff --git a/roles/etcd/tasks/gen_nodes_certs_script.yml b/roles/etcd/tasks/gen_nodes_certs_script.yml
new file mode 100644
index 000000000..d176e01aa
--- /dev/null
+++ b/roles/etcd/tasks/gen_nodes_certs_script.yml
@@ -0,0 +1,32 @@
+---
+- name: Gen_certs | Set cert names per node
+  set_fact:
+    my_etcd_node_certs: [ 'ca.pem',
+                          'node-{{ inventory_hostname }}.pem',
+                          'node-{{ inventory_hostname }}-key.pem']
+  tags:
+    - facts
+
+- name: "Check_certs | Set 'sync_certs' to true on nodes"
+  set_fact:
+    sync_certs: true
+  with_items:
+    - "{{ my_etcd_node_certs }}"
+
+- name: Gen_certs | Gather node certs
+  shell: "set -o pipefail && tar cfz - -C {{ etcd_cert_dir }} {{ my_etcd_node_certs|join(' ') }} | base64 --wrap=0"
+  args:
+    executable: /bin/bash
+    warn: false
+  no_log: "{{ not (unsafe_show_logs|bool) }}"
+  register: etcd_node_certs
+  check_mode: no
+  delegate_to: "{{ groups['etcd'][0] }}"
+  changed_when: false
+
+- name: Gen_certs | Copy certs on nodes
+  shell: "set -o pipefail && base64 -d <<< '{{ etcd_node_certs.stdout|quote }}' | tar xz -C {{ etcd_cert_dir }}"
+  args:
+    executable: /bin/bash
+  no_log: "{{ not (unsafe_show_logs|bool) }}"
+  changed_when: false
diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml
index 465be73fa..fb593dbae 100644
--- a/roles/etcd/tasks/main.yml
+++ b/roles/etcd/tasks/main.yml
@@ -12,6 +12,16 @@
     - etcd-secrets
 
 - include_tasks: upd_ca_trust.yml
+  when:
+    - inventory_hostname in groups['etcd']|union(groups['kube_control_plane'])|unique|sort
+  tags:
+    - etcd-secrets
+
+- include_tasks: upd_ca_trust.yml
+  when:
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
+    - inventory_hostname in groups['k8s_cluster']
   tags:
     - etcd-secrets
 
@@ -21,7 +31,9 @@
   changed_when: false
   check_mode: no
   when:
-    - inventory_hostname in groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
+    - inventory_hostname in groups['k8s_cluster']
   tags:
     - master
     - network
@@ -30,7 +42,9 @@
   set_fact:
     etcd_client_cert_serial: "{{ etcd_client_cert_serial_result.stdout.split('=')[1] }}"
   when:
-    - inventory_hostname in groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort
+    - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+    - kube_network_plugin != "calico" or calico_datastore == "etcd"
+    - inventory_hostname in groups['k8s_cluster']
   tags:
     - master
     - network
diff --git a/scale.yml b/scale.yml
index 533b97727..8e79bfa03 100644
--- a/scale.yml
+++ b/scale.yml
@@ -27,7 +27,7 @@
   import_playbook: facts.yml
 
 - name: Generate the etcd certificates beforehand
-  hosts: etcd
+  hosts: etcd:kube_control_plane
   gather_facts: False
   any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
   environment: "{{ proxy_disable_env }}"
@@ -38,7 +38,10 @@
       vars:
         etcd_cluster_setup: false
         etcd_events_cluster_setup: false
-      when: etcd_deployment_type != "kubeadm"
+      when:
+        - etcd_deployment_type != "kubeadm"
+        - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+        - kube_network_plugin != "calico" or calico_datastore == "etcd"
 
 - name: Download images to ansible host cache via first kube_control_plane node
   hosts: kube_control_plane[0]
@@ -60,7 +63,14 @@
     - { role: kubernetes/preinstall, tags: preinstall }
     - { role: container-engine, tags: "container-engine", when: deploy_container_engine }
     - { role: download, tags: download, when: "not skip_downloads" }
-    - { role: etcd, tags: etcd, etcd_cluster_setup: false, when: "etcd_deployment_type != 'kubeadm'" }
+    - role: etcd
+      tags: etcd
+      vars:
+        etcd_cluster_setup: false
+      when:
+        - etcd_deployment_type != "kubeadm"
+        - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+        - kube_network_plugin != "calico" or calico_datastore == "etcd"
 
 - name: Target only workers to get kubelet installed and checking in on any new nodes(node)
   hosts: kube_node
diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml
index 35064ec60..39dd95a01 100644
--- a/upgrade-cluster.yml
+++ b/upgrade-cluster.yml
@@ -59,7 +59,7 @@
     - { role: kubespray-defaults }
     - { role: container-engine, tags: "container-engine", when: deploy_container_engine }
 
-- hosts: etcd
+- hosts: etcd:kube_control_plane
   gather_facts: False
   any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
   environment: "{{ proxy_disable_env }}"
@@ -83,7 +83,10 @@
       vars:
         etcd_cluster_setup: false
         etcd_events_cluster_setup: false
-      when: etcd_deployment_type != "kubeadm"
+      when:
+        - etcd_deployment_type != "kubeadm"
+        - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
+        - kube_network_plugin != "calico" or calico_datastore == "etcd"
 
 - name: Handle upgrades to master components first to maintain backwards compat.
   gather_facts: False
-- 
GitLab