From 97ebbb96724e973c9d0127314f0d64f496beafac Mon Sep 17 00:00:00 2001
From: Matthew Mosesohn <mmosesohn@mirantis.com>
Date: Tue, 14 Feb 2017 19:08:44 +0300
Subject: [PATCH] Add graceful upgrade process

Based on #718 introduced by rsmitty.

Includes all roles and all options to support deployment of
new hosts in case they were added to inventory.

Main difference here is that master role is evaluated first
so that master components get upgraded first.

Fixes #694
---
 .gitlab-ci.yml                            | 15 +++--
 docs/upgrades.md                          | 16 ++++-
 roles/upgrade/post-upgrade/tasks/main.yml |  2 +-
 roles/upgrade/pre-upgrade/tasks/main.yml  |  4 +-
 upgrade-cluster.yml                       | 81 ++++++++++++++++++++---
 5 files changed, 96 insertions(+), 22 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 2d281cd72..305b69575 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -101,8 +101,8 @@ before_script:
 
     # Check out latest tag if testing upgrade
     # Uncomment when gitlab kargo repo has tags
-    #- test "${UPGRADE_TEST}" = "true" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1))
-    - test "${UPGRADE_TEST}" = "true" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0
+    #- test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1))
+    - test "${UPGRADE_TEST}" != "false" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0
 
 
     # Create cluster
@@ -127,9 +127,10 @@ before_script:
       cluster.yml
 
     # Repeat deployment if testing upgrade
-    #FIXME(mattymo): repeat "Create cluster" above without duplicating code 
     - >
-      if [ "${UPGRADE_TEST}" = "true" ]; then 
+      if [ "${UPGRADE_TEST}" != "false" ]; then 
+      test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml";
+      test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml";
       pip install ansible==2.2.1.0; 
       git checkout "${CI_BUILD_REF}"; 
       ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER 
@@ -149,7 +150,7 @@ before_script:
       -e resolvconf_mode=${RESOLVCONF_MODE} 
       -e weave_cpu_requests=${WEAVE_CPU_LIMIT} 
       -e weave_cpu_limit=${WEAVE_CPU_LIMIT} 
-      cluster.yml; 
+      $PLAYBOOK; 
       fi
 
     # Tests Cases
@@ -253,7 +254,7 @@ before_script:
   KUBE_NETWORK_PLUGIN: canal
   CLOUD_IMAGE: debian-8-kubespray
   CLOUD_REGION: us-east1-b
-  UPGRADE_TEST: "true"
+  UPGRADE_TEST: "basic"
   CLUSTER_MODE: ha
 
 .rhel7_weave_variables: &rhel7_weave_variables
@@ -261,7 +262,7 @@ before_script:
   KUBE_NETWORK_PLUGIN: weave
   CLOUD_IMAGE: rhel-7
   CLOUD_REGION: europe-west1-b
-  UPGRADE_TEST: "true"
+  UPGRADE_TEST: "graceful"
   CLUSTER_MODE: default
 
 .centos7_flannel_variables: &centos7_flannel_variables
diff --git a/docs/upgrades.md b/docs/upgrades.md
index 4a25bd622..9a57f43ac 100644
--- a/docs/upgrades.md
+++ b/docs/upgrades.md
@@ -18,7 +18,7 @@ versions. Here are all version vars for each component:
 * flannel_version
 * kubedns_version
 
-#### Example
+#### Unsafe upgrade example
 
 If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could
 deploy the following way:
@@ -33,6 +33,20 @@ And then repeat with v1.4.6 as kube_version:
 ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6
 ```
 
+#### Graceful upgrade
+
+Kargo also supports cordon, drain and uncordoning of nodes when performing 
+a cluster upgrade. There is a separate playbook used for this purpose. It is
+important to note that upgrade-cluster.yml can only be used for upgrading an
+existing cluster. That means there must be at least 1 kube-master already
+deployed.
+
+```
+git fetch origin
+git checkout origin/master
+ansible-playbook upgrade-cluster cluster.yml -i inventory/inventory.cfg
+```
+
 #### Upgrade order
 
 As mentioned above, components are upgraded in the order in which they were
diff --git a/roles/upgrade/post-upgrade/tasks/main.yml b/roles/upgrade/post-upgrade/tasks/main.yml
index b576b0947..d8243d04d 100644
--- a/roles/upgrade/post-upgrade/tasks/main.yml
+++ b/roles/upgrade/post-upgrade/tasks/main.yml
@@ -1,5 +1,5 @@
 ---
 
 - name: Uncordon node
-  command: kubectl uncordon {{ ansible_hostname }}
+  command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}"
   delegate_to: "{{ groups['kube-master'][0] }}"
diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml
index 390e9e559..90b535d2e 100644
--- a/roles/upgrade/pre-upgrade/tasks/main.yml
+++ b/roles/upgrade/pre-upgrade/tasks/main.yml
@@ -1,11 +1,11 @@
 ---
 
 - name: Cordon node
-  command: kubectl cordon {{ ansible_hostname }}
+  command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}"
   delegate_to: "{{ groups['kube-master'][0] }}"
 
 - name: Drain node
-  command: kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}
+  command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}"
   delegate_to: "{{ groups['kube-master'][0] }}"
 
 - name: Sleep for grace period for draining
diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml
index d7089fc49..1be9c9cab 100644
--- a/upgrade-cluster.yml
+++ b/upgrade-cluster.yml
@@ -1,33 +1,92 @@
 ---
-- hosts: all
+- hosts: localhost
+  gather_facts: False
+  roles:
+    - bastion-ssh-config
+  tags: [localhost, bastion]
+
+- hosts: k8s-cluster:etcd:calico-rr
+  any_errors_fatal: true
+  gather_facts: false
+  vars:
+    # Need to disable pipelining for bootstrap-os as some systems have requiretty in sudoers set, which makes pipelining
+    # fail. bootstrap-os fixes this on these systems, so in later plays it can be enabled.
+    ansible_ssh_pipelining: false
+  roles:
+    - bootstrap-os
+  tags:
+    - bootstrap-os
+
+- hosts: k8s-cluster:etcd:calico-rr
   any_errors_fatal: true
+  vars:
+    ansible_ssh_pipelining: true
   gather_facts: true
 
-- hosts: all:!network-storage
+- hosts: k8s-cluster:etcd:calico-rr
   any_errors_fatal: true
   roles:
+    - { role: kernel-upgrade, tags: kernel-upgrade, when: kernel_upgrade is defined and kernel_upgrade }
     - { role: kubernetes/preinstall, tags: preinstall }
+    - { role: docker, tags: docker }
+    - role: rkt
+      tags: rkt
+      when: "'rkt' in [etcd_deployment_type, kubelet_deployment_type, vault_deployment_type]"
+
+- hosts: etcd:k8s-cluster:vault
+  any_errors_fatal: true
+  roles:
+    - { role: vault, tags: vault, vault_bootstrap: true, when: "cert_management == 'vault'" }
 
 - hosts: etcd:!k8s-cluster
   any_errors_fatal: true
-  serial: 1
   roles:
     - { role: etcd, tags: etcd }
 
-- hosts: kube-node
+- hosts: k8s-cluster
   any_errors_fatal: true
-  serial: 1
   roles:
     - { role: etcd, tags: etcd }
-    - { role: upgrade/pre-upgrade, tags: upgrade/pre-upgrade }
-    - { role: kubernetes/node, tags: node }
-    - { role: network_plugin, tags: network }
-    - { role: upgrade/post-upgrade, tags: upgrade/post-upgrade }
 
+- hosts: etcd:k8s-cluster:vault
+  any_errors_fatal: true
+  roles:
+    - { role: vault, tags: vault, when: "cert_management == 'vault'"}
+
+#Handle upgrades to master components first to maintain backwards compat.
 - hosts: kube-master
   any_errors_fatal: true
   serial: 1
   roles:
-    - { role: etcd, tags: etcd }
+    - { role: upgrade/pre-upgrade, tags: pre-upgrade }
     - { role: kubernetes/node, tags: node }
-    - { role: kubernetes/master, tags: master }
\ No newline at end of file
+    - { role: kubernetes/master, tags: master }
+    - { role: network_plugin, tags: network }
+    - { role: upgrade/post-upgrade, tags: post-upgrade }
+
+#Finally handle worker upgrades, based on given batch size
+- hosts: kube-node:!kube-master
+  any_errors_fatal: true
+  serial: "{{ serial | default('20%') }}"
+  roles:
+    - { role: upgrade/pre-upgrade, tags: pre-upgrade }
+    - { role: kubernetes/node, tags: node }
+    - { role: network_plugin, tags: network }
+    - { role: upgrade/post-upgrade, tags: post-upgrade }
+    - { role: kubernetes-apps/network_plugin, tags: network }
+
+- hosts: calico-rr
+  any_errors_fatal: true
+  roles:
+    - { role: network_plugin/calico/rr, tags: network }
+
+- hosts: k8s-cluster
+  any_errors_fatal: true
+  roles:
+    - { role: dnsmasq, when: "dns_mode == 'dnsmasq_kubedns'", tags: dnsmasq }
+    - { role: kubernetes/preinstall, when: "dns_mode != 'none' and resolvconf_mode == 'host_resolvconf'", tags: resolvconf }
+
+- hosts: kube-master[0]
+  any_errors_fatal: true
+  roles:
+    - { role: kubernetes-apps, tags: apps }
-- 
GitLab