From 45262da7267aea1162c5196ab826f2f7226b98bc Mon Sep 17 00:00:00 2001
From: Cristian Calin <6627509+cristicalin@users.noreply.github.com>
Date: Thu, 14 Apr 2022 11:08:46 +0300
Subject: [PATCH] [calico] call calico checks early on to prevent altering the
 cluster with bad configuration (#8707)

---
 docs/calico.md                                |  37 ++++---
 .../preinstall/tasks/0020-verify-settings.yml |  85 ---------------
 roles/kubernetes/preinstall/tasks/main.yml    |   8 ++
 roles/network_plugin/calico/tasks/check.yml   | 102 ++++++++++++++++++
 roles/network_plugin/calico/tasks/main.yml    |   2 -
 5 files changed, 135 insertions(+), 99 deletions(-)

diff --git a/docs/calico.md b/docs/calico.md
index 51c2ec655..1cec1392c 100644
--- a/docs/calico.md
+++ b/docs/calico.md
@@ -214,6 +214,13 @@ Calico supports two types of encapsulation: [VXLAN and IP in IP](https://docs.pr
 
 *IP in IP* and *VXLAN* is mutualy exclusive modes.
 
+Kubespray defaults have changed after version 2.18 from auto-enabling `ipip` mode to auto-enabling `vxlan`. This was done to facilitate wider deployment scenarios including those where vxlan acceleration is provided by the underlying network devices.
+
+If you are running your cluster with the default calico settings and are upgrading to a release post 2.18.x (i.e. 2.19 and later or `master` branch) then you have two options:
+
+* perform a manual migration to vxlan before upgrading kubespray (see migrating from IP in IP to VXLAN below)
+* pin the pre-2.19 settings in your ansible inventory (see IP in IP mode settings below)
+
 ### IP in IP mode
 
 To configure Ip in Ip mode you need to use the bird network backend.
@@ -224,27 +231,33 @@ calico_vxlan_mode: 'Never'
 calico_network_backend: 'bird'
 ```
 
-### VXLAN mode (default)
+### BGP mode
 
-To configure VXLAN mode you can use the default settings, the example below is provided for your reference.
+To enable BGP no-encapsulation mode:
 
 ```yml
 calico_ipip_mode: 'Never'
-calico_vxlan_mode: 'Always'  # Possible values is `Always`, `CrossSubnet`, `Never`.
-calico_network_backend: 'vxlan'
+calico_vxlan_mode: 'Never'
+calico_network_backend: 'bird'
 ```
 
-In VXLAN mode BGP networking is not required.
-We disable BGP to reduce the moving parts in your cluster by `calico_network_backend: vxlan`
+### Migrating from IP in IP to VXLAN
 
-### BGP mode
+If you would like to migrate from the old IP in IP with `bird` network backends default to the new VXLAN based encapsulation you need to perform this change before running an upgrade of your cluster; the `cluster.yml` and `upgrade-cluster.yml` playbooks will refuse to continue if they detect incompatible settings.
 
-To enable BGP no-encapsulation mode:
+Execute the following sters on one of the control plane nodes, ensure the cluster in healthy before proceeding.
 
-```yml
-calico_ipip_mode: 'Never'
-calico_vxlan_mode: 'Never'
-calico_network_backend: 'bird'
+```shell
+calicoctl.sh patch felixconfig default -p '{"spec":{"vxlanEnabled":true}}'
+calicoctl.sh patch ippool default-pool -p '{"spec":{"ipipMode":"Never", "vxlanMode":"Always"}}'
+```
+
+**Note:** if you created multiple ippools you will need to patch all of them individually to change their encapsulation. The kubespray playbooks only handle the default ippool creaded by kubespray.
+
+Wait for the `vxlan.calico` interfaces to be created on all cluster nodes and traffic to be routed through it then you can disable `ipip`.
+
+```shell
+calicoctl.sh patch felixconfig default -p '{"spec":{"ipipEnabled":false}}'
 ```
 
 ## Configuring interface MTU
diff --git a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml
index 08f4eaeb1..eae32a4c6 100644
--- a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml
+++ b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml
@@ -36,44 +36,6 @@
     - kube_network_plugin is defined
     - not ignore_assert_errors
 
-- name: Stop if legacy encapsulation variables are detected (ipip)
-  assert:
-    that:
-      - ipip is not defined
-    msg: "'ipip' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs"
-  when:
-    - kube_network_plugin == 'calico'
-    - not ignore_assert_errors
-
-- name: Stop if legacy encapsulation variables are detected (ipip_mode)
-  assert:
-    that:
-      - ipip_mode is not defined
-    msg: "'ipip_mode' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs"
-  when:
-    - kube_network_plugin == 'calico'
-    - not ignore_assert_errors
-
-- name: Stop if incompatible network plugin and cloudprovider
-  assert:
-    that:
-      - calico_ipip_mode == 'Never'
-      - calico_vxlan_mode in ['Always', 'CrossSubnet']
-    msg: "When using cloud_provider azure and network_plugin calico calico_ipip_mode must be 'Never' and calico_vxlan_mode 'Always' or 'CrossSubnet'"
-  when:
-    - cloud_provider is defined and cloud_provider == 'azure'
-    - kube_network_plugin == 'calico'
-    - not ignore_assert_errors
-
-- name: Stop if supported Calico versions
-  assert:
-    that:
-      - "calico_version in calico_crds_archive_checksums.keys()"
-    msg: "Calico version not supported {{ calico_version }} not in {{ calico_crds_archive_checksums.keys() }}"
-  when:
-    - kube_network_plugin == 'calico'
-    - not ignore_assert_errors
-
 - name: Stop if unsupported version of Kubernetes
   assert:
     that: kube_version is version(kube_version_min_required, '>=')
@@ -200,53 +162,6 @@
     - cloud-provider
     - facts
 
-- name: Get current calico cluster version
-  shell: "set -o pipefail && {{ bin_dir }}/calicoctl.sh version  | grep 'Cluster Version:' | awk '{ print $3}'"
-  args:
-    executable: /bin/bash
-  register: calico_version_on_server
-  async: 10
-  poll: 3
-  run_once: yes
-  changed_when: false
-  failed_when: false
-  when:
-    - kube_network_plugin == 'calico'
-
-- name: Check that current calico version is enough for upgrade
-  assert:
-    that:
-      - calico_version_on_server.stdout is version(calico_min_version_required, '>=')
-    msg: >
-      Your version of calico is not fresh enough for upgrade.
-      Minimum version is {{ calico_min_version_required }} supported by the previous kubespray release.
-  when:
-    - kube_network_plugin == 'calico'
-    - 'calico_version_on_server.stdout is defined'
-    - calico_version_on_server.stdout
-    - inventory_hostname == groups['kube_control_plane'][0]
-  run_once: yes
-
-- name: "Check that cluster_id is set if calico_rr enabled"
-  assert:
-    that:
-      - cluster_id is defined
-    msg: "A unique cluster_id is required if using calico_rr"
-  when:
-    - kube_network_plugin == 'calico'
-    - peer_with_calico_rr
-    - inventory_hostname == groups['kube_control_plane'][0]
-  run_once: yes
-
-- name: "Check that calico_rr nodes are in k8s_cluster group"
-  assert:
-    that:
-      - '"k8s_cluster" in group_names'
-    msg: "calico_rr must be a child group of k8s_cluster group"
-  when:
-    - kube_network_plugin == 'calico'
-    - '"calico_rr" in group_names'
-
 - name: "Check that kube_service_addresses is a network range"
   assert:
     that:
diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml
index 718f36092..495be4abb 100644
--- a/roles/kubernetes/preinstall/tasks/main.yml
+++ b/roles/kubernetes/preinstall/tasks/main.yml
@@ -117,3 +117,11 @@
     - ansible_os_family == "RedHat"
   tags:
     - bootstrap-os
+
+- name: Run calico checks
+  include_role:
+    name: network_plugin/calico
+    tasks_from: check
+  when:
+    - kube_network_plugin == 'calico'
+    - not ignore_assert_errors
diff --git a/roles/network_plugin/calico/tasks/check.yml b/roles/network_plugin/calico/tasks/check.yml
index 41bf77cd3..0b164534a 100644
--- a/roles/network_plugin/calico/tasks/check.yml
+++ b/roles/network_plugin/calico/tasks/check.yml
@@ -1,16 +1,102 @@
 ---
+- name: Stop if legacy encapsulation variables are detected (ipip)
+  assert:
+    that:
+      - ipip is not defined
+    msg: "'ipip' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+
+- name: Stop if legacy encapsulation variables are detected (ipip_mode)
+  assert:
+    that:
+      - ipip_mode is not defined
+    msg: "'ipip_mode' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+
+- name: Stop if incompatible network plugin and cloudprovider
+  assert:
+    that:
+      - calico_ipip_mode == 'Never'
+      - calico_vxlan_mode in ['Always', 'CrossSubnet']
+    msg: "When using cloud_provider azure and network_plugin calico calico_ipip_mode must be 'Never' and calico_vxlan_mode 'Always' or 'CrossSubnet'"
+  when:
+    - cloud_provider is defined and cloud_provider == 'azure'
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+
+- name: Stop if supported Calico versions
+  assert:
+    that:
+      - "calico_version in calico_crds_archive_checksums.keys()"
+    msg: "Calico version not supported {{ calico_version }} not in {{ calico_crds_archive_checksums.keys() }}"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+
+- name: Get current calico cluster version
+  shell: "set -o pipefail && {{ bin_dir }}/calicoctl.sh version  | grep 'Cluster Version:' | awk '{ print $3}'"
+  args:
+    executable: /bin/bash
+  register: calico_version_on_server
+  async: 10
+  poll: 3
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+  changed_when: false
+  failed_when: false
+
+- name: Check that current calico version is enough for upgrade
+  assert:
+    that:
+      - calico_version_on_server.stdout is version(calico_min_version_required, '>=')
+    msg: >
+      Your version of calico is not fresh enough for upgrade.
+      Minimum version is {{ calico_min_version_required }} supported by the previous kubespray release.
+  when:
+    - 'calico_version_on_server.stdout is defined'
+    - calico_version_on_server.stdout
+    - inventory_hostname == groups['kube_control_plane'][0]
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+
+- name: "Check that cluster_id is set if calico_rr enabled"
+  assert:
+    that:
+      - cluster_id is defined
+    msg: "A unique cluster_id is required if using calico_rr"
+  when:
+    - peer_with_calico_rr
+    - inventory_hostname == groups['kube_control_plane'][0]
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+
+- name: "Check that calico_rr nodes are in k8s_cluster group"
+  assert:
+    that:
+      - '"k8s_cluster" in group_names'
+    msg: "calico_rr must be a child group of k8s_cluster group"
+  when:
+    - '"calico_rr" in group_names'
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+
 - name: "Check vars defined correctly"
   assert:
     that:
       - "calico_pool_name is defined"
       - "calico_pool_name is match('^[a-zA-Z0-9-_\\\\.]{2,63}$')"
     msg: "calico_pool_name contains invalid characters"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Check calico network backend defined correctly"
   assert:
     that:
       - "calico_network_backend in ['bird', 'vxlan', 'none']"
     msg: "calico network backend is not 'bird', 'vxlan' or 'none'"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Check ipip and vxlan mode defined correctly"
   assert:
@@ -18,6 +104,8 @@
       - "calico_ipip_mode in ['Always', 'CrossSubnet', 'Never']"
       - "calico_vxlan_mode in ['Always', 'CrossSubnet', 'Never']"
     msg: "calico inter host encapsulation mode is not 'Always', 'CrossSubnet' or 'Never'"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Check ipip and vxlan mode if simultaneously enabled"
   assert:
@@ -26,6 +114,8 @@
     msg: "IP in IP and VXLAN mode is mutualy exclusive modes"
   when:
     - "calico_ipip_mode in ['Always', 'CrossSubnet']"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Check ipip and vxlan mode if simultaneously enabled"
   assert:
@@ -34,6 +124,8 @@
     msg: "IP in IP and VXLAN mode is mutualy exclusive modes"
   when:
     - "calico_vxlan_mode in ['Always', 'CrossSubnet']"
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Get Calico {{ calico_pool_name }} configuration"
   command: calicoctl.sh get ipPool {{ calico_pool_name }} -o json
@@ -48,6 +140,8 @@
   set_fact:
     calico_pool_conf: '{{ calico.stdout | from_json }}'
   when: calico.rc == 0 and calico.stdout
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Check if inventory match current cluster configuration"
   assert:
@@ -59,15 +153,23 @@
     msg: "Your inventory doesn't match the current cluster configuration"
   when:
     - calico_pool_conf is defined
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Check kdd calico_datastore if calico_apiserver_enabled"
   assert:
     that: calico_datastore == "kdd"
+    msg: "When using calico apiserver you need to use the kubernetes datastore"
   when:
     - calico_apiserver_enabled
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
 
 - name: "Check kdd calico_datastore if typha_enabled"
   assert:
     that: calico_datastore == "kdd"
+    msg: "When using typha you need to use the kubernetes datastore"
   when:
     - typha_enabled
+  run_once: True
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml
index df5812036..81844fa4f 100644
--- a/roles/network_plugin/calico/tasks/main.yml
+++ b/roles/network_plugin/calico/tasks/main.yml
@@ -1,6 +1,4 @@
 ---
-- import_tasks: check.yml
-
 - import_tasks: pre.yml
 
 - import_tasks: repos.yml
-- 
GitLab