diff --git a/.gitignore b/.gitignore index 074dd431d0a0af08ac7a52a90c81d77dc0a0531c..b9b150a3d7fee144385bf9511174321f8f289f7c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .vagrant *.retry inventory/vagrant_ansible_inventory +inventory/group_vars/fake_hosts.yml temp .idea .tox diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9c663030639522956ccf056890494127863adcda..f02e7b84296f7163c830f7c1289b7ac3c04745ef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -124,6 +124,7 @@ before_script: -e local_release_dir=${PWD}/downloads -e resolvconf_mode=${RESOLVCONF_MODE} -e vault_deployment_type=${VAULT_DEPLOYMENT} + --limit "all:!fake_hosts" cluster.yml # Repeat deployment if testing upgrade @@ -150,18 +151,19 @@ before_script: -e resolvconf_mode=${RESOLVCONF_MODE} -e weave_cpu_requests=${WEAVE_CPU_LIMIT} -e weave_cpu_limit=${WEAVE_CPU_LIMIT} + --limit "all:!fake_hosts" $PLAYBOOK; fi # Tests Cases ## Test Master API - - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/010_check-apiserver.yml $LOG_LEVEL + - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/010_check-apiserver.yml $LOG_LEVEL ## Ping the between 2 pod - - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL + - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/030_check-network.yml $LOG_LEVEL ## Advanced DNS checks - - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/040_check-network-adv.yml $LOG_LEVEL + - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $LOG_LEVEL ## Idempotency checks 1/5 (repeat deployment) - > @@ -178,6 +180,7 @@ before_script: -e local_release_dir=${PWD}/downloads -e etcd_deployment_type=${ETCD_DEPLOYMENT} -e kubelet_deployment_type=${KUBELET_DEPLOYMENT} + --limit "all:!fake_hosts" cluster.yml; fi @@ -186,6 +189,7 @@ before_script: if [ "${IDEMPOT_CHECK}" = "true" ]; then ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root + --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $LOG_LEVEL; fi @@ -197,7 +201,8 @@ before_script: --private-key=${HOME}/.ssh/id_rsa -e bootstrap_os=${BOOTSTRAP_OS} -e ansible_python_interpreter=${PYPATH} - -e reset_confirmation=yes + -e reset_confirmation=yes + --limit "all:!fake_hosts" reset.yml; fi @@ -216,6 +221,7 @@ before_script: -e local_release_dir=${PWD}/downloads -e etcd_deployment_type=${ETCD_DEPLOYMENT} -e kubelet_deployment_type=${KUBELET_DEPLOYMENT} + --limit "all:!fake_hosts" cluster.yml; fi @@ -224,6 +230,7 @@ before_script: if [ "${IDEMPOT_CHECK}" = "true" ]; then ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root + --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $LOG_LEVEL; fi @@ -244,7 +251,7 @@ before_script: .coreos_calico_sep_variables: &coreos_calico_sep_variables # stage: deploy-gce-part1 KUBE_NETWORK_PLUGIN: calico - CLOUD_IMAGE: coreos-stable-1235-6-0-v20170111 + CLOUD_IMAGE: coreos-stable-1235-6-0-v20170111 CLOUD_REGION: us-west1-b CLUSTER_MODE: separate BOOTSTRAP_OS: coreos @@ -287,8 +294,8 @@ before_script: CLOUD_REGION: us-east1-b CLUSTER_MODE: default BOOTSTRAP_OS: coreos - RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12 IDEMPOT_CHECK: "true" + RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12 .rhel7_canal_sep_variables: &rhel7_canal_sep_variables # stage: deploy-gce-special @@ -310,16 +317,17 @@ before_script: KUBE_NETWORK_PLUGIN: calico CLOUD_IMAGE: centos-7 CLOUD_REGION: europe-west1-b - CLUSTER_MODE: ha + CLUSTER_MODE: ha-scale IDEMPOT_CHECK: "true" .coreos_alpha_weave_ha_variables: &coreos_alpha_weave_ha_variables # stage: deploy-gce-special KUBE_NETWORK_PLUGIN: weave - CLOUD_IMAGE: coreos-alpha + CLOUD_IMAGE: coreos-alpha-1325-0-0-v20170216 CLOUD_REGION: us-west1-a - CLUSTER_MODE: ha + CLUSTER_MODE: ha-scale BOOTSTRAP_OS: coreos + RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12 .ubuntu_rkt_sep_variables: &ubuntu_rkt_sep_variables # stage: deploy-gce-part1 diff --git a/docs/test_cases.md b/docs/test_cases.md index 20b05286124075f4ff7713edb2a870ce97fe95c6..8cb2823951d69aed387bcf439d505b3770da2516 100644 --- a/docs/test_cases.md +++ b/docs/test_cases.md @@ -4,25 +4,40 @@ Travis CI test matrix GCE instances ------------- -Here is the test matrix for the Travis CI gates: +Here is the test matrix for the CI gates: | Network plugin| OS type| GCE region| Nodes layout| |-------------------------|-------------------------|-------------------------|-------------------------| -| canal| debian-8-kubespray| asia-east1-a| ha| +| canal| debian-8-kubespray| asia-east1-a| ha-scale| | calico| debian-8-kubespray| europe-west1-c| default| | flannel| centos-7| asia-northeast1-c| default| | calico| centos-7| us-central1-b| ha| | weave| rhel-7| us-east1-c| default| -| canal| coreos-stable| us-west1-b| default| +| canal| coreos-stable| us-west1-b| ha-scale| | canal| rhel-7| asia-northeast1-b| separate| | weave| ubuntu-1604-xenial| europe-west1-d| separate| | calico| coreos-stable| us-central1-f| separate| -Where the nodes layout `default` is a non-HA two nodes setup with the separate `kube-node` -and the `etcd` group merged with the `kube-master`. The `separate` layout is when -there is only node of each type, which is a kube master, compute and etcd cluster member. -And the `ha` layout stands for a two etcd nodes, two masters and a single worker node, -partially intersecting though. + +Node Layouts +------------ + +There are four node layout types: `default`, `separate`, `ha`, and `scale`. + + +`default` is a non-HA two nodes setup with one separate `kube-node` +and the `etcd` group merged with the `kube-master`. + +`separate` layout is when there is only node of each type, which includes + a kube-master, kube-node, and etcd cluster member. + +`ha` layout consists of two etcd nodes, two masters and a single worker node, +with role intersection. + +`scale` layout can be combined with above layouts. It includes 200 fake hosts +in the Ansible inventory. This helps test TLS certificate generation at scale +to prevent regressions and profile certain long-running tasks. These nodes are +never actually deployed, but certificates are generated for them. Note, the canal network plugin deploys flannel as well plus calico policy controller. @@ -40,15 +55,15 @@ GCE instances | Stage| Network plugin| OS type| GCE region| Nodes layout |--------------------|--------------------|--------------------|--------------------|--------------------| -| part1| calico| coreos-stable| us-west1-b| separated| +| part1| calico| coreos-stable| us-west1-b| separate| | part1| canal| debian-8-kubespray| us-east1-b| ha| | part1| weave| rhel-7| europe-west1-b| default| | part2| flannel| centos-7| us-west1-a| default| | part2| calico| debian-8-kubespray| us-central1-b| default| | part2| canal| coreos-stable| us-east1-b| default| -| special| canal| rhel-7| us-east1-b| separated| -| special| weave| ubuntu-1604-xenial| us-central1-b| separated| -| special| calico| centos-7| europe-west1-b| ha| -| special| weave| coreos-alpha| us-west1-a| ha| +| special| canal| rhel-7| us-east1-b| separate| +| special| weave| ubuntu-1604-xenial| us-central1-b| default| +| special| calico| centos-7| europe-west1-b| ha-scale| +| special| weave| coreos-alpha| us-west1-a| ha-scale| The "Stage" means a build step of the build pipeline. The steps are ordered as `part1->part2->special`. diff --git a/roles/download/tasks/set_docker_image_facts.yml b/roles/download/tasks/set_docker_image_facts.yml index 3d3b3b1d6d1e5b0e79379aac9631b567c5250ab2..0efda4d091ad8ea0ce9bda57278d0d3c99aadcbc 100644 --- a/roles/download/tasks/set_docker_image_facts.yml +++ b/roles/download/tasks/set_docker_image_facts.yml @@ -10,6 +10,7 @@ - name: Register docker images info raw: >- {{ docker_bin_dir }}/docker images -q | xargs {{ docker_bin_dir }}/docker inspect -f "{{ '{{' }} .RepoTags {{ '}}' }},{{ '{{' }} .RepoDigests {{ '}}' }}" + no_log: true register: docker_images_raw failed_when: false check_mode: no @@ -17,6 +18,7 @@ - set_fact: docker_images: "{{docker_images_raw.stdout|regex_replace('\\[|\\]|\\n]','')|regex_replace('\\s',',')}}" + no_log: true when: not download_always_pull|bool - set_fact: diff --git a/roles/etcd/tasks/check_certs.yml b/roles/etcd/tasks/check_certs.yml index 270c0abfc6827a162429020a14a0a645ef03b6e0..bc14e255fa1d244f1be3bb86c099f05b3a1d9da9 100644 --- a/roles/etcd/tasks/check_certs.yml +++ b/roles/etcd/tasks/check_certs.yml @@ -2,6 +2,7 @@ - name: "Check_certs | check if all certs have already been generated on first master" stat: path: "{{ etcd_cert_dir }}/{{ item }}" + get_md5: no delegate_to: "{{groups['etcd'][0]}}" register: etcdcert_master run_once: true diff --git a/roles/etcd/tasks/gen_certs_script.yml b/roles/etcd/tasks/gen_certs_script.yml index 55b54e1ac5bfc5a2e33a923363ba057554306eba..bb6d55660b9da0de29d8922f01a3a67750fdd1dc 100644 --- a/roles/etcd/tasks/gen_certs_script.yml +++ b/roles/etcd/tasks/gen_certs_script.yml @@ -87,6 +87,7 @@ args: executable: /bin/bash register: etcd_master_cert_data + no_log: true check_mode: no delegate_to: "{{groups['etcd'][0]}}" when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and @@ -98,6 +99,7 @@ args: executable: /bin/bash register: etcd_node_cert_data + no_log: true check_mode: no delegate_to: "{{groups['etcd'][0]}}" when: (('calico-rr' in groups and inventory_hostname in groups['calico-rr']) or @@ -109,6 +111,7 @@ shell: "base64 -d <<< '{{etcd_master_cert_data.stdout|quote}}' | tar xz -C {{ etcd_cert_dir }}" args: executable: /bin/bash + no_log: true changed_when: false when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and inventory_hostname != groups['etcd'][0] diff --git a/roles/kubernetes/secrets/tasks/gen_certs_script.yml b/roles/kubernetes/secrets/tasks/gen_certs_script.yml index 1257af8c9a9ab701da524cbd404ba6e257fa026a..f75a45d1ae31810dc75fd2a348caf35e898a454a 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs_script.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs_script.yml @@ -80,6 +80,7 @@ shell: "tar cfz - -C {{ kube_cert_dir }} -T /dev/stdin <<< {{ my_master_certs|join(' ') }} {{ all_node_certs|join(' ') }} | base64 --wrap=0" args: executable: /bin/bash + no_log: true register: master_cert_data check_mode: no delegate_to: "{{groups['kube-master'][0]}}" @@ -90,6 +91,7 @@ shell: "tar cfz - -C {{ kube_cert_dir }} -T /dev/stdin <<< {{ my_node_certs|join(' ') }} | base64 --wrap=0" args: executable: /bin/bash + no_log: true register: node_cert_data check_mode: no delegate_to: "{{groups['kube-master'][0]}}" @@ -116,6 +118,7 @@ - name: Gen_certs | Unpack certs on masters shell: "base64 -d < {{ cert_tempfile.stdout }} | tar xz -C {{ kube_cert_dir }}" + no_log: true changed_when: false check_mode: no when: inventory_hostname in groups['kube-master'] and sync_certs|default(false) and @@ -133,6 +136,7 @@ shell: "base64 -d <<< '{{node_cert_data.stdout|quote}}' | tar xz -C {{ kube_cert_dir }}" args: executable: /bin/bash + no_log: true changed_when: false check_mode: no when: inventory_hostname in groups['kube-node'] and diff --git a/tests/cloud_playbooks/create-gce.yml b/tests/cloud_playbooks/create-gce.yml index 4ae32b7878991da09832b170f830649ab8458d27..d1e7c011e82bb324b546640ab630f928eed2d2ab 100644 --- a/tests/cloud_playbooks/create-gce.yml +++ b/tests/cloud_playbooks/create-gce.yml @@ -13,7 +13,7 @@ - set_fact: instance_names: >- - {%- if mode in ['separate', 'ha'] -%} + {%- if mode in ['separate', 'separate-scale', 'ha', 'ha-scale'] -%} k8s-{{test_name}}-1,k8s-{{test_name}}-2,k8s-{{test_name}}-3 {%- else -%} k8s-{{test_name}}-1,k8s-{{test_name}}-2 @@ -39,6 +39,18 @@ src: ../templates/inventory-gce.j2 dest: "{{ inventory_path }}" + - name: Make group_vars directory + file: + path: "{{ inventory_path|dirname }}/group_vars" + state: directory + when: mode in ['scale', 'separate-scale', 'ha-scale'] + + - name: Template fake hosts group vars + template: + src: ../templates/fake_hosts.yml.j2 + dest: "{{ inventory_path|dirname }}/group_vars/fake_hosts.yml" + when: mode in ['scale', 'separate-scale', 'ha-scale'] + - name: Wait for SSH to come up wait_for: host={{item.public_ip}} port=22 delay=10 timeout=180 state=started with_items: "{{gce.instance_data}}" diff --git a/tests/templates/fake_hosts.yml.j2 b/tests/templates/fake_hosts.yml.j2 new file mode 100644 index 0000000000000000000000000000000000000000..673109213ab1325a95e02cc29bfa343d4cf362b0 --- /dev/null +++ b/tests/templates/fake_hosts.yml.j2 @@ -0,0 +1,3 @@ +ansible_default_ipv4: + address: 255.255.255.255 +ansible_hostname: "{{ '{{' }}inventory_hostname}}" diff --git a/tests/templates/inventory-gce.j2 b/tests/templates/inventory-gce.j2 index f5326229c1d8a7e814c87121bc05b37445d96a4a..d58f6896cdf96752c63070ed20f0713c7ad569a1 100644 --- a/tests/templates/inventory-gce.j2 +++ b/tests/templates/inventory-gce.j2 @@ -2,12 +2,11 @@ {% set node2 = gce.instance_data[1].name %} {{node1}} ansible_ssh_host={{gce.instance_data[0].public_ip}} {{node2}} ansible_ssh_host={{gce.instance_data[1].public_ip}} -{% if mode is defined and mode in ["separate", "ha"] %} +{% if mode is defined and mode in ["ha", "ha-scale", "separate", "separate-scale"] %} {% set node3 = gce.instance_data[2].name %} {{node3}} ansible_ssh_host={{gce.instance_data[2].public_ip}} {% endif %} - -{% if mode is defined and mode == "separate" %} +{% if mode is defined and mode in ["separate", "separate-scale"] %} [kube-master] {{node1}} @@ -19,7 +18,7 @@ [vault] {{node3}} -{% elif mode is defined and mode == "ha" %} +{% elif mode is defined and mode in ["ha", "ha-scale"] %} [kube-master] {{node1}} {{node2}} @@ -51,3 +50,12 @@ [k8s-cluster:children] kube-node kube-master + +{% if mode is defined and mode in ["scale", "separate-scale", "ha-scale"] %} +[fake_hosts] +fake_scale_host[1:200] + +[kube-node:children] +fake_hosts +{% endif %} + diff --git a/tests/testcases/040_check-network-adv.yml b/tests/testcases/040_check-network-adv.yml index 667707046af5161a0393b0199d270100f16e806e..fded3155ff1d9ea540d993a862b1e6858b53e16f 100644 --- a/tests/testcases/040_check-network-adv.yml +++ b/tests/testcases/040_check-network-adv.yml @@ -35,31 +35,30 @@ run_once: true delegate_to: "{{groups['kube-master'][0]}}" register: nca_pod - until: "{{ nca_pod.stdout_lines|length }} >= {{ groups['kube-node']|length * 2 }}" + until: "{{ nca_pod.stdout_lines|length }} >= {{ groups['kube-node']|intersect(play_hosts)|length * 2 }}" retries: 3 delay: 10 - name: Get netchecker agents uri: url=http://localhost:{{netchecker_port}}/api/v1/agents/ return_content=yes run_once: true - delegate_to: "{{groups['kube-node'][0]}}" + delegate_to: "{{groups['kube-master'][0]}}" register: agents - retries: 6 + retries: 18 delay: "{{ agent_report_interval }}" until: "{{ agents.content|length > 0 and agents.content[0] == '{' and - agents.content|from_json|length >= groups['kube-node']|length * 2 }}" + agents.content|from_json|length >= groups['kube-node']|intersect(play_hosts)|length * 2 }}" failed_when: false no_log: true - debug: var=agents.content|from_json failed_when: not agents|success and not agents.content=='{}' - delegate_to: "{{groups['kube-node'][0]}}" run_once: true - name: Check netchecker status uri: url=http://localhost:{{netchecker_port}}/api/v1/connectivity_check status_code=200 return_content=yes - delegate_to: "{{groups['kube-node'][0]}}" + delegate_to: "{{groups['kube-master'][0]}}" run_once: true register: result retries: 3 @@ -70,9 +69,9 @@ - debug: var=result.content|from_json failed_when: not result|success - delegate_to: "{{groups['kube-node'][0]}}" run_once: true when: not agents.content=='{}' + delegate_to: "{{groups['kube-master'][0]}}" - debug: msg="Cannot get reports from agents, consider as PASSING" run_once: true