From be9414fabea54d96fd928e690130dc1fc88e9bf5 Mon Sep 17 00:00:00 2001
From: Maxime Guyot <Miouge1@users.noreply.github.com>
Date: Wed, 1 Apr 2020 16:23:29 +0200
Subject: [PATCH] Add cluster dump artifact in CI jobs (#5796)

---
 .gitlab-ci.yml                                |  5 +++-
 .gitlab-ci/terraform.yml                      |  3 ++
 tests/Makefile                                | 12 ++++----
 tests/scripts/testcases_prepare.sh            |  1 +
 tests/scripts/testcases_run.sh                | 30 +++++++++----------
 tests/testcases/015_check-pods-running.yml    |  3 ++
 tests/testcases/020_check-nodes-ready.yml     |  3 ++
 tests/testcases/030_check-network.yml         |  3 ++
 tests/testcases/040_check-network-adv.yml     |  3 ++
 .../roles/cluster-dump/tasks/main.yml         | 18 +++++++++++
 10 files changed, 59 insertions(+), 22 deletions(-)
 create mode 100644 tests/testcases/roles/cluster-dump/tasks/main.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d437d3d6e..f35b988fe 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -25,7 +25,7 @@ variables:
   IDEMPOT_CHECK: "false"
   RESET_CHECK: "false"
   UPGRADE_TEST: "false"
-  LOG_LEVEL: "-vv"
+  ANSIBLE_LOG_LEVEL: "-vv"
   RECOVER_CONTROL_PLANE_TEST: "false"
   RECOVER_CONTROL_PLANE_TEST_GROUPS: "etcd[2:],kube-master[1:]"
 
@@ -41,6 +41,9 @@ before_script:
   variables:
     KUBESPRAY_VERSION: v2.12.5
   image: quay.io/kubespray/kubespray:$KUBESPRAY_VERSION
+  artifacts:
+    paths:
+      - cluster-dump/
 
 .testcases: &testcases
   <<: *job
diff --git a/.gitlab-ci/terraform.yml b/.gitlab-ci/terraform.yml
index 8b26fec0b..13eea9687 100644
--- a/.gitlab-ci/terraform.yml
+++ b/.gitlab-ci/terraform.yml
@@ -34,6 +34,9 @@
   stage: deploy-part2
   when: manual
   only: [/^pr-.*$/]
+  artifacts:
+    paths:
+      - cluster-dump/
   variables:
     ANSIBLE_INVENTORY_UNPARSED_FAILED: "true"
     ANSIBLE_INVENTORY: hosts
diff --git a/tests/Makefile b/tests/Makefile
index 6b6956f2c..bc495d81a 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -24,7 +24,7 @@ delete-tf:
 
 create-gce: init-gce
 	ansible-playbook cloud_playbooks/create-gce.yml -i local_inventory/hosts.cfg -c local \
-	$(LOG_LEVEL) \
+	$(ANSIBLE_LOG_LEVEL) \
 	-e @"files/${CI_JOB_NAME}.yml" \
 	-e gce_credentials_file=$(HOME)/.ssh/gce.json \
 	-e gce_project_id=$(GCE_PROJECT_ID) \
@@ -36,7 +36,7 @@ create-gce: init-gce
 
 delete-gce:
 	ansible-playbook -i $(INVENTORY) cloud_playbooks/delete-gce.yml -c local \
-	$(LOG_LEVEL) \
+	$(ANSIBLE_LOG_LEVEL) \
 	-e @"files/${CI_JOB_NAME}.yml" \
 	-e test_id=$(TEST_ID) \
 	-e gce_project_id=$(GCE_PROJECT_ID) \
@@ -46,14 +46,14 @@ delete-gce:
 
 create-do: init-do
 	ansible-playbook cloud_playbooks/create-do.yml -i local_inventory/hosts.cfg -c local \
-	${LOG_LEVEL} \
+	${ANSIBLE_LOG_LEVEL} \
 	-e @"files/${CI_JOB_NAME}.yml" \
 	-e inventory_path=$(INVENTORY) \
 	-e test_id=${TEST_ID}
 
 delete-do:
 	ansible-playbook -i $(INVENTORY) cloud_playbooks/create-do.yml -c local \
-	$(LOG_LEVEL) \
+	$(ANSIBLE_LOG_LEVEL) \
 	-e @"files/${CI_JOB_NAME}.yml" \
 	-e state=absent \
 	-e test_id=${TEST_ID} \
@@ -61,14 +61,14 @@ delete-do:
 
 create-packet: init-packet
 	ansible-playbook cloud_playbooks/create-packet.yml -c local \
-	$(LOG_LEVEL) \
+	$(ANSIBLE_LOG_LEVEL) \
 	-e @"files/${CI_JOB_NAME}.yml" \
 	-e test_id=$(TEST_ID) \
 	-e inventory_path=$(INVENTORY)
 
 delete-packet:
 	ansible-playbook cloud_playbooks/delete-packet.yml -c local \
-	$(LOG_LEVEL) \
+	$(ANSIBLE_LOG_LEVEL) \
 	-e @"files/${CI_JOB_NAME}.yml" \
 	-e test_id=$(TEST_ID) \
 	-e inventory_path=$(INVENTORY)
diff --git a/tests/scripts/testcases_prepare.sh b/tests/scripts/testcases_prepare.sh
index 4f5610097..454315783 100755
--- a/tests/scripts/testcases_prepare.sh
+++ b/tests/scripts/testcases_prepare.sh
@@ -3,5 +3,6 @@ set -euxo pipefail
 
 /usr/bin/python -m pip install -r tests/requirements.txt
 mkdir -p /.ssh
+mkdir -p cluster-dump
 mkdir -p $HOME/.ssh
 ansible-playbook --version
diff --git a/tests/scripts/testcases_run.sh b/tests/scripts/testcases_run.sh
index a174db06f..614c8183a 100755
--- a/tests/scripts/testcases_run.sh
+++ b/tests/scripts/testcases_run.sh
@@ -37,62 +37,62 @@ test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout "$KUBESPRAY
 test "${UPGRADE_TEST}" != "false" && git checkout "${CI_BUILD_REF}" tests/files/${CI_JOB_NAME}.yml tests/testcases/*.yml
 
 # Create cluster
-ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" cluster.yml
+ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" cluster.yml
 
 # Repeat deployment if testing upgrade
 if [ "${UPGRADE_TEST}" != "false" ]; then
   test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml"
   test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml"
   git checkout "${CI_BUILD_REF}"
-  ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" $PLAYBOOK
+  ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" $PLAYBOOK
 fi
 
 # Test control plane recovery
 if [ "${RECOVER_CONTROL_PLANE_TEST}" != "false" ]; then
-  ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "${RECOVER_CONTROL_PLANE_TEST_GROUPS}:!fake_hosts" -e reset_confirmation=yes reset.yml
-  ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} -e etcd_retries=10 --limit etcd,kube-master:!fake_hosts recover-control-plane.yml
+  ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "${RECOVER_CONTROL_PLANE_TEST_GROUPS}:!fake_hosts" -e reset_confirmation=yes reset.yml
+  ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} -e etcd_retries=10 --limit etcd,kube-master:!fake_hosts recover-control-plane.yml
 fi
 
 # Tests Cases
 ## Test Master API
-ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/010_check-apiserver.yml $LOG_LEVEL
+ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/010_check-apiserver.yml $ANSIBLE_LOG_LEVEL
 
 ## Test that all pods are Running
-ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/015_check-pods-running.yml $LOG_LEVEL
+ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/015_check-pods-running.yml $ANSIBLE_LOG_LEVEL
 
 ## Test that all nodes are Ready
-ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/020_check-nodes-ready.yml $LOG_LEVEL
+ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/020_check-nodes-ready.yml $ANSIBLE_LOG_LEVEL
 
 ## Test pod creation and ping between them
-ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/030_check-network.yml $LOG_LEVEL
+ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/030_check-network.yml $ANSIBLE_LOG_LEVEL
 
 ## Advanced DNS checks
-ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $LOG_LEVEL
+ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $ANSIBLE_LOG_LEVEL
 
 ## Kubernetes conformance tests
-ansible-playbook -i ${ANSIBLE_INVENTORY} -e ansible_python_interpreter=${PYPATH} -e @${CI_TEST_VARS} --limit "all:!fake_hosts" tests/testcases/100_check-k8s-conformance.yml $LOG_LEVEL
+ansible-playbook -i ${ANSIBLE_INVENTORY} -e ansible_python_interpreter=${PYPATH} -e @${CI_TEST_VARS} --limit "all:!fake_hosts" tests/testcases/100_check-k8s-conformance.yml $ANSIBLE_LOG_LEVEL
 
 ## Idempotency checks 1/5 (repeat deployment)
 if [ "${IDEMPOT_CHECK}" = "true" ]; then
-  ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e ansible_python_interpreter=${PYPATH} -e local_release_dir=${PWD}/downloads --limit "all:!fake_hosts" cluster.yml
+  ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} -e ansible_python_interpreter=${PYPATH} -e local_release_dir=${PWD}/downloads --limit "all:!fake_hosts" cluster.yml
 fi
 
 ## Idempotency checks 2/5 (Advanced DNS checks)
 if [ "${IDEMPOT_CHECK}" = "true" ]; then
-  ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml
+  ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml
 fi
 
 ## Idempotency checks 3/5 (reset deployment)
 if [ "${IDEMPOT_CHECK}" = "true" -a "${RESET_CHECK}" = "true" ]; then
-  ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e ansible_python_interpreter=${PYPATH} -e reset_confirmation=yes --limit "all:!fake_hosts" reset.yml
+  ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} -e ansible_python_interpreter=${PYPATH} -e reset_confirmation=yes --limit "all:!fake_hosts" reset.yml
 fi
 
 ## Idempotency checks 4/5 (redeploy after reset)
 if [ "${IDEMPOT_CHECK}" = "true" -a "${RESET_CHECK}" = "true" ]; then
-  ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e ansible_python_interpreter=${PYPATH} -e local_release_dir=${PWD}/downloads --limit "all:!fake_hosts" cluster.yml
+  ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_VARS} -e ansible_python_interpreter=${PYPATH} -e local_release_dir=${PWD}/downloads --limit "all:!fake_hosts" cluster.yml
 fi
 
 ## Idempotency checks 5/5 (Advanced DNS checks)
 if [ "${IDEMPOT_CHECK}" = "true" -a "${RESET_CHECK}" = "true" ]; then
-  ansible-playbook -e ansible_python_interpreter=${PYPATH}  --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $LOG_LEVEL
+  ansible-playbook -e ansible_python_interpreter=${PYPATH}  --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $ANSIBLE_LOG_LEVEL
 fi
diff --git a/tests/testcases/015_check-pods-running.yml b/tests/testcases/015_check-pods-running.yml
index 3c21aca57..9679be5fc 100644
--- a/tests/testcases/015_check-pods-running.yml
+++ b/tests/testcases/015_check-pods-running.yml
@@ -12,6 +12,9 @@
       bin_dir: "/usr/local/bin"
     when: not ansible_os_family in ["CoreOS", "Coreos", "Container Linux by CoreOS", "Flatcar", "Flatcar Container Linux by Kinvolk"]
 
+  - import_role:
+      name: cluster-dump
+
   - name: Check kubectl output
     shell: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide"
     register: get_pods
diff --git a/tests/testcases/020_check-nodes-ready.yml b/tests/testcases/020_check-nodes-ready.yml
index d78a5673b..14a3468f5 100644
--- a/tests/testcases/020_check-nodes-ready.yml
+++ b/tests/testcases/020_check-nodes-ready.yml
@@ -12,6 +12,9 @@
       bin_dir: "/usr/local/bin"
     when: not ansible_os_family in ["CoreOS", "Coreos", "Container Linux by CoreOS", "Flatcar", "Flatcar Container Linux by Kinvolk"]
 
+  - import_role:
+      name: cluster-dump
+
   - name: Check kubectl output
     shell: "{{ bin_dir }}/kubectl get nodes"
     register: get_nodes
diff --git a/tests/testcases/030_check-network.yml b/tests/testcases/030_check-network.yml
index 064ea12e0..e741450cb 100644
--- a/tests/testcases/030_check-network.yml
+++ b/tests/testcases/030_check-network.yml
@@ -21,6 +21,9 @@
   - name: Run a replica controller composed of 2 pods in test ns
     shell: "{{ bin_dir }}/kubectl run test --image={{ test_image_repo }}:{{ test_image_tag }} --namespace test --replicas=2 --command -- tail -f /dev/null"
 
+  - import_role:
+      name: cluster-dump
+
   - name: Check that all pods are running and ready
     shell: "{{ bin_dir }}/kubectl get pods --namespace test --no-headers -o yaml"
     register: run_pods_log
diff --git a/tests/testcases/040_check-network-adv.yml b/tests/testcases/040_check-network-adv.yml
index 88859666c..de2197e09 100644
--- a/tests/testcases/040_check-network-adv.yml
+++ b/tests/testcases/040_check-network-adv.yml
@@ -24,6 +24,9 @@
         bin_dir: "/usr/local/bin"
       when: not ansible_os_family in ["CoreOS", "Coreos", "Container Linux by CoreOS", "Flatcar", "Flatcar Container Linux by Kinvolk"]
 
+    - import_role:
+        name: cluster-dump
+
     - name: Wait for netchecker server
       shell: "{{ bin_dir }}/kubectl get pods -o wide --namespace {{ netcheck_namespace }} | grep ^netchecker-server"
       delegate_to: "{{ groups['kube-master'][0] }}"
diff --git a/tests/testcases/roles/cluster-dump/tasks/main.yml b/tests/testcases/roles/cluster-dump/tasks/main.yml
new file mode 100644
index 000000000..e1d5d35a6
--- /dev/null
+++ b/tests/testcases/roles/cluster-dump/tasks/main.yml
@@ -0,0 +1,18 @@
+---
+- name: Generate dump folder
+  shell: "{{ bin_dir }}/kubectl cluster-info dump --all-namespaces --output-directory /tmp/cluster-dump"
+  no_log: true
+  when: inventory_hostname in groups['kube-master']
+
+- name: Compress directory cluster-dump
+  archive:
+    path: /tmp/cluster-dump
+    dest: /tmp/cluster-dump.tgz
+  when: inventory_hostname in groups['kube-master']
+
+- name: Fetch dump file
+  fetch:
+    src: /tmp/cluster-dump.tgz
+    dest: "{{ lookup('env', 'CI_PROJECT_DIR') }}/cluster-dump/{{ inventory_hostname }}.tgz"
+    flat: true
+  when: inventory_hostname in groups['kube-master']
-- 
GitLab