From 930df78d8a90960751cdffb5b3904af963806afd Mon Sep 17 00:00:00 2001
From: Max Gautier <mg@max.gautier.name>
Date: Thu, 12 Dec 2024 14:18:04 +0100
Subject: [PATCH] CI: Use deployment instead of Pod for agnhost

This is a followup to 2ba28a338 (Revert "Wait for available API token in
a new namespace (#7045)", 2024-10-25).

While checking for the serviceaccount token is not effective, there is
still a race when creating a Pod directly, because the ServiceAccount
itself might not be created yet.
More details at https://github.com/kubernetes/kubernetes/issues/66689.

This cause very frequent flakes in our CI with spurious failures.

Use a Deployment instead ; it will takes cares of creating the Pods and
retrying ; it also let us use kubectl rollout status instead of manually
checking for the pods.
---
 tests/testcases/030_check-network.yml | 72 ++++++++++++---------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/tests/testcases/030_check-network.yml b/tests/testcases/030_check-network.yml
index aa4595898..b5942b116 100644
--- a/tests/testcases/030_check-network.yml
+++ b/tests/testcases/030_check-network.yml
@@ -79,53 +79,47 @@
     command:
       cmd: "{{ bin_dir }}/kubectl apply -f -"
       stdin: |
-        apiVersion: v1
-        kind: Pod
+        apiVersion: apps/v1
+        kind: Deployment
         metadata:
-          name: {{ item }}
-          namespace: test
+          name: agnhost
         spec:
-          containers:
-          - name: agnhost
-            image: {{ test_image_repo }}:{{ test_image_tag }}
-            command: ['/agnhost', 'netexec', '--http-port=8080']
-            securityContext:
-              allowPrivilegeEscalation: false
-              capabilities:
-                drop: ['ALL']
-              runAsUser: 1000
-              runAsNonRoot: true
-              seccompProfile:
-                type: RuntimeDefault
+          replicas: 2
+          selector:
+            matchLabels:
+              app: agnhost
+          template:
+            metadata:
+              labels:
+                app: agnhost
+            spec:
+              containers:
+              - name: agnhost
+                image: {{ test_image_repo }}:{{ test_image_tag }}
+                command: ['/agnhost', 'netexec', '--http-port=8080']
+                securityContext:
+                  allowPrivilegeEscalation: false
+                  capabilities:
+                    drop: ['ALL']
+                  runAsUser: 1000
+                  runAsNonRoot: true
+                  seccompProfile:
+                    type: RuntimeDefault
     changed_when: false
-    loop:
-    - agnhost1
-    - agnhost2
 
   - import_role:  # noqa name[missing]
       name: cluster-dump
 
   - name: Check that all pods are running and ready
-    command: "{{ bin_dir }}/kubectl get pods --namespace test --no-headers -o yaml"
-    changed_when: false
-    register: run_pods_log
-    until:
-    # Check that all pods are running
-    - '(run_pods_log.stdout | from_yaml)["items"] | map(attribute = "status.phase") | unique | list == ["Running"]'
-    # Check that all pods are ready
-    - '(run_pods_log.stdout | from_yaml)["items"] | map(attribute = "status.containerStatuses") | map("map", attribute = "ready") | map("min") | min'
-    retries: 18
-    delay: 10
-    failed_when: false
-
-  - name: Get pod names
-    command: "{{ bin_dir }}/kubectl get pods -n test -o json"
-    changed_when: false
-    register: pods
-
-  - debug:  # noqa name[missing]
-      msg: "{{ pods.stdout.split('\n') }}"
-    failed_when: not run_pods_log is success
+    block:
+    - name: Check Deployment is ready
+      command: "{{ bin_dir }}/kubectl rollout status deploy --namespace test agnhost --timeout=180"
+      changed_when: false
+    rescue:
+    - name: Get pod names
+      command: "{{ bin_dir }}/kubectl get pods -n test -o json"
+      changed_when: false
+      register: pods
 
   - name: Get hostnet pods
     command: "{{ bin_dir }}/kubectl get pods -n test -o
-- 
GitLab