diff --git a/README.md b/README.md index 8e3c855ce24a414a35a7cb1667910116654e8fde..2171330834602da123e84e4de21017d7f26d80d9 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,10 @@ master node ``` +If multiple hosts are in the master group, the playbook will automatically setup k3s in HA mode with etcd. +https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/ +This requires at least k3s version 1.19.1 + If needed, you can also edit `inventory/my-cluster/group_vars/all.yml` to match your environment. Start provisioning of the cluster using the following command: diff --git a/inventory/sample/group_vars/all.yml b/inventory/sample/group_vars/all.yml index ada7200dac24ce614fde8ba860d703efd361b28e..e2c26e13d7b5e09c91aea25032dd8f9950410dfe 100644 --- a/inventory/sample/group_vars/all.yml +++ b/inventory/sample/group_vars/all.yml @@ -2,6 +2,15 @@ k3s_version: v1.22.3+k3s1 ansible_user: debian systemd_dir: /etc/systemd/system -master_ip: "{{ hostvars[groups['master'][0]]['ansible_host'] | default(groups['master'][0]) }}" + +# If you define multiple masters you should be providing a loadbalanced +# apiserver endpoint to all masters here. This default value is only suitable +# for a non-HA setup, if used in a HA setup, it will not protect you if the +# first node fails. +# Also you should define k3s_token so that masters can talk together securely + +apiserver_endpoint: "{{ hostvars[groups['master'][0]]['ansible_host'] | default(groups['master'][0]) }}" +# k3s_token: "mysupersecuretoken" + extra_server_args: "" extra_agent_args: "" diff --git a/roles/k3s/master/defaults/main.yml b/roles/k3s/master/defaults/main.yml index c56778f9307a0b496617ada60ef63ec7fab076e0..ad94756e7c494198e5e1b85382e49c7cf0f83911 100644 --- a/roles/k3s/master/defaults/main.yml +++ b/roles/k3s/master/defaults/main.yml @@ -1,2 +1,11 @@ --- -k3s_server_location: /var/lib/rancher/k3s +server_init_args: >- + {% if groups['master'] | length > 1 %} + {% if ansible_host == hostvars[groups['master'][0]]['ansible_host'] | default(groups['master'][0]) %} + --cluster-init --tls-san {{ apiserver_endpoint }} + {% else %} + --server https://{{ hostvars[groups['master'][0]]['ansible_host'] | default(groups['master'][0]) }}:6443 + {% endif %} + --token {{ k3s_token }} + {% endif %} + {{ extra_server_args | default('') }} diff --git a/roles/k3s/master/tasks/main.yml b/roles/k3s/master/tasks/main.yml index 77b58f60a03a043e51ac6040e909d95684152ab3..2cf08657253ff1f7be5b288e11a53be43b3228b6 100644 --- a/roles/k3s/master/tasks/main.yml +++ b/roles/k3s/master/tasks/main.yml @@ -1,4 +1,43 @@ --- +- name: Clean previous runs of k3s-init + systemd: + name: k3s-init + state: stopped + failed_when: false + +- name: Clean previous runs of k3s-init + command: systemctl reset-failed k3s-init + failed_when: false + changed_when: false + args: + warn: false # The ansible systemd module does not support reset-failed + +- name: Init cluster inside the transient k3s-init service + command: + cmd: "systemd-run -p RestartSec=2 \ + -p Restart=on-failure \ + --unit=k3s-init \ + k3s server {{ server_init_args }}" + creates: "{{ systemd_dir }}/k3s.service" + args: + warn: false # The ansible systemd module does not support transient units + +- name: Verification + block: + - name: Verify that all nodes actually joined (check k3s-init.service if this fails) + command: + cmd: k3s kubectl get nodes -l "node-role.kubernetes.io/master=true" -o=jsonpath="{.items[*].metadata.name}" + register: nodes + until: nodes.rc == 0 and (nodes.stdout.split() | length) == (groups['master'] | length) + retries: 20 + delay: 10 + changed_when: false + always: + - name: Kill the temporary service used for initialization + systemd: + name: k3s-init + state: stopped + failed_when: false - name: Copy K3s service file register: k3s_service @@ -59,10 +98,10 @@ owner: "{{ ansible_user }}" mode: "u=rw,g=,o=" -- name: Replace https://localhost:6443 by https://master-ip:6443 +- name: Configure kubectl cluster to https://{{ apiserver_endpoint }}:6443 command: >- k3s kubectl config set-cluster default - --server=https://{{ master_ip }}:6443 + --server=https://{{ apiserver_endpoint }}:6443 --kubeconfig ~{{ ansible_user }}/.kube/config changed_when: true diff --git a/roles/k3s/node/templates/k3s.service.j2 b/roles/k3s/node/templates/k3s.service.j2 index 99a0ac3d08fe3c131da4adffd3643a78afb71113..01baa64edaaea12e2cb07fb5857a25cc80ea7c0e 100644 --- a/roles/k3s/node/templates/k3s.service.j2 +++ b/roles/k3s/node/templates/k3s.service.j2 @@ -7,7 +7,7 @@ After=network-online.target Type=notify ExecStartPre=-/sbin/modprobe br_netfilter ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s agent --server https://{{ master_ip }}:6443 --token {{ hostvars[groups['master'][0]]['token'] }} {{ extra_agent_args | default("") }} +ExecStart=/usr/local/bin/k3s agent --server https://{{ apiserver_endpoint }}:6443 --token {{ hostvars[groups['master'][0]]['token'] | default(k3s_token) }} {{ extra_agent_args | default("") }} KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 728447fbb3b0ff8b5fe38ce6aa4a10d714640836..1547c4d75ad355c68bb839e1124a0de66bab4cc0 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -8,6 +8,7 @@ with_items: - k3s - k3s-node + - k3s-init - name: pkill -9 -f "k3s/data/[^/]+/bin/containerd-shim-runc" register: pkill_containerd_shim_runc