From a9e29a9eb2e0e01e50760281a05a2247def44b4d Mon Sep 17 00:00:00 2001
From: Max Gautier <mg@max.gautier.name>
Date: Tue, 16 Jan 2024 15:50:41 +0100
Subject: [PATCH] Fix etcd client generation (#10769)

* ci: redefine multinode to node-etcd-client

This should allow to catch several class of problem rather than just
one -> from network plugin such as calico or cilium talking directly to
the etcd.

* Dynamically define etcd host range

This has two benefits:
- We don't play the etcd role twice for no reason
- We have access to the whole cluster (if needed) to use things like
  group_by.
---
 docs/test_cases.md                            |  5 +--
 playbooks/install_etcd.yml                    | 34 +++++++++----------
 .../roles/packet-ci/templates/inventory.j2    |  6 +++-
 .../roles/packet-ci/vars/main.yml             |  2 +-
 .../packet_ubuntu22-calico-etcd-datastore.yml |  2 +-
 5 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/docs/test_cases.md b/docs/test_cases.md
index d5aef62f2..b26d3aa50 100644
--- a/docs/test_cases.md
+++ b/docs/test_cases.md
@@ -1,6 +1,6 @@
 # Node Layouts
 
-There are six node layout types: `default`, `separate`, `ha`, `scale`, `all-in-one`, and `multinode`.
+There are six node layout types: `default`, `separate`, `ha`, `scale`, `all-in-one`, and `node-etcd-client`.
 
 `default` is a non-HA two nodes setup with one separate `kube_node`
 and the `etcd` group merged with the `kube_control_plane`.
@@ -18,7 +18,8 @@ never actually deployed, but certificates are generated for them.
 
 `all-in-one` layout use a single node for with `kube_control_plane`, `etcd` and `kube_node` merged.
 
-`multinode` layout consists of two separate `kube_node` and a merged single `etcd+kube_control_plane` node.
+`node-etcd-client` layout consists of a 4 nodes cluster, all of them in `kube_node`, first 3 in `etcd` and only one `kube_control_plane`.
+This is necessary to tests setups requiring that nodes are etcd clients (use of cilium as `network_plugin` for instance)
 
 Note, the canal network plugin deploys flannel as well plus calico policy controller.
 
diff --git a/playbooks/install_etcd.yml b/playbooks/install_etcd.yml
index 9bd13dfcd..b8e4d1d89 100644
--- a/playbooks/install_etcd.yml
+++ b/playbooks/install_etcd.yml
@@ -1,20 +1,21 @@
 ---
-- name: Install etcd
-  hosts: etcd:kube_control_plane
-  gather_facts: False
-  any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
-  environment: "{{ proxy_disable_env }}"
+- name: Add worker nodes to the etcd play if needed
+  hosts: kube_node
   roles:
     - { role: kubespray-defaults }
-    - role: etcd
+  tasks:
+    - name: Check if nodes needs etcd client certs (depends on network_plugin)
+      group_by:
+        key: "_kubespray_needs_etcd"
+      when:
+        - kube_network_plugin in ["flannel", "canal", "cilium"] or
+          (cilium_deploy_additionally | default(false)) or
+          (kube_network_plugin == "calico" and calico_datastore == "etcd")
+        - etcd_deployment_type != "kubeadm"
       tags: etcd
-      vars:
-        etcd_cluster_setup: true
-        etcd_events_cluster_setup: "{{ etcd_events_cluster_enabled }}"
-      when: etcd_deployment_type != "kubeadm"
 
-- name: Install etcd certs on nodes if required
-  hosts: k8s_cluster
+- name: Install etcd
+  hosts: etcd:kube_control_plane:_kubespray_needs_etcd
   gather_facts: False
   any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
   environment: "{{ proxy_disable_env }}"
@@ -23,9 +24,6 @@
     - role: etcd
       tags: etcd
       vars:
-        etcd_cluster_setup: false
-        etcd_events_cluster_setup: false
-      when:
-        - etcd_deployment_type != "kubeadm"
-        - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool
-        - kube_network_plugin != "calico" or calico_datastore == "etcd"
+        etcd_cluster_setup: true
+        etcd_events_cluster_setup: "{{ etcd_events_cluster_enabled }}"
+      when: etcd_deployment_type != "kubeadm"
diff --git a/tests/cloud_playbooks/roles/packet-ci/templates/inventory.j2 b/tests/cloud_playbooks/roles/packet-ci/templates/inventory.j2
index da7e74969..cb6977629 100644
--- a/tests/cloud_playbooks/roles/packet-ci/templates/inventory.j2
+++ b/tests/cloud_playbooks/roles/packet-ci/templates/inventory.j2
@@ -81,16 +81,20 @@ instance-2
 [broken_etcd]
 instance-1 etcd_member_name=etcd2
 instance-2 etcd_member_name=etcd3
-{% elif mode == "multinode" %}
+{% elif mode == "node-etcd-client" %}
 [kube_control_plane]
 instance-1
 
 [etcd]
 instance-1
+instance-2
+instance-3
 
 [kube_node]
+instance-1
 instance-2
 instance-3
+instance-4
 {% endif %}
 
 [k8s_cluster:children]
diff --git a/tests/cloud_playbooks/roles/packet-ci/vars/main.yml b/tests/cloud_playbooks/roles/packet-ci/vars/main.yml
index 5cfe561b7..81aa17d9f 100644
--- a/tests/cloud_playbooks/roles/packet-ci/vars/main.yml
+++ b/tests/cloud_playbooks/roles/packet-ci/vars/main.yml
@@ -7,7 +7,7 @@ _vm_count_dict:
   ha-recover: 3
   ha-recover-noquorum: 3
   all-in-one: 1
-  multinode: 3
+  node-etcd-client: 4
   default: 2
 
 vm_count: "{{ _vm_count_dict[mode | d('default')] }}"
diff --git a/tests/files/packet_ubuntu22-calico-etcd-datastore.yml b/tests/files/packet_ubuntu22-calico-etcd-datastore.yml
index c95a7602f..e2d3cb72a 100644
--- a/tests/files/packet_ubuntu22-calico-etcd-datastore.yml
+++ b/tests/files/packet_ubuntu22-calico-etcd-datastore.yml
@@ -1,7 +1,7 @@
 ---
 # Instance settings
 cloud_image: ubuntu-2204
-mode: multinode
+mode: node-etcd-client
 vm_memory: 1600Mi
 
 # Kubespray settings
-- 
GitLab