diff --git a/.gitlab-ci/packet.yml b/.gitlab-ci/packet.yml index 9b432a19a894327852e0cb4b249fa454b7dc3a31..6e72a4cd811351af523edcfcbbff9acd4e7820a1 100644 --- a/.gitlab-ci/packet.yml +++ b/.gitlab-ci/packet.yml @@ -194,6 +194,11 @@ packet_amazon-linux-2-aio: extends: .packet_pr when: manual +packet_centos8-calico-nodelocaldns-secondary: + stage: deploy-part2 + extends: .packet_pr + when: manual + packet_fedora34-kube-ovn-containerd: stage: deploy-part2 extends: .packet_periodic diff --git a/docs/dns-stack.md b/docs/dns-stack.md index 7771c26bbdd71a6b8a583dec2bcc33f8bbb576ad..b6d2064a624fee040053c645ec231ea8573031c5 100644 --- a/docs/dns-stack.md +++ b/docs/dns-stack.md @@ -212,6 +212,22 @@ nodelocaldns_external_zones: See [dns_etchosts](#dns_etchosts-coredns) above. +### Nodelocal DNS HA + +Under some circumstances the single POD nodelocaldns implementation may not be able to be replaced soon enough and a cluster upgrade or a nodelocaldns upgrade can cause DNS requests to time out for short intervals. If for any reason your applications cannot tollerate this behavior you can enable a redundant nodelocal DNS pod on each node: + +```yaml +enable_nodelocaldns_secondary: true +``` + +**Note:** when the nodelocaldns secondary is enabled, the primary is instructed to no longer tear down the iptables rules it sets up to direct traffic to itself. In case both daemonsets have failing pods on the same node, this can cause a DNS blackout with traffic no longer being forwarded to the coredns central service as a fallback. Please ensure you account for this also if you decide to disable the nodelocaldns cache. + +There is a time delta (in seconds) allowed for the secondary nodelocaldns to survive in case both primary and secondary daemonsets are updated at the same time. It is advised to tune this variable after you have performed some tests in your own environment. + +```yaml +nodelocaldns_secondary_skew_seconds: 5 +``` + ## Limitations * Kubespray has yet ways to configure Kubedns addon to forward requests SkyDns can diff --git a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml index 4248832eb59d241d36344423ed9d2f7d648849fc..dbd66d3ddd006d0c35256833dfa022c76bf953df 100644 --- a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml +++ b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml @@ -166,9 +166,12 @@ dns_mode: coredns # manual_dns_server: 10.x.x.x # Enable nodelocal dns cache enable_nodelocaldns: true +enable_nodelocaldns_secondary: false nodelocaldns_ip: 169.254.25.10 nodelocaldns_health_port: 9254 +nodelocaldns_second_health_port: 9256 nodelocaldns_bind_metrics_host_ip: false +nodelocaldns_secondary_skew_seconds: 5 # nodelocaldns_external_zones: # - zones: # - example.com diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 8e858bb3a8fae11e1ac746476bdd307fffe26cb1..6d19e232405339108c6c2fcb3329ceb69a4e9181 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -610,7 +610,7 @@ coredns_image_is_namespaced: "{{ (kube_version is version('v1.21.0','>=')) or (c coredns_image_repo: "{{ kube_image_repo }}{{'/coredns/coredns' if (coredns_image_is_namespaced | bool) else '/coredns' }}" coredns_image_tag: "{{ coredns_version if (coredns_image_is_namespaced | bool) else (coredns_version | regex_replace('^v', '')) }}" -nodelocaldns_version: "1.17.1" +nodelocaldns_version: "1.21.1" nodelocaldns_image_repo: "{{ kube_image_repo }}/dns/k8s-dns-node-cache" nodelocaldns_image_tag: "{{ nodelocaldns_version }}" diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml index 411260551cb2d7605416fde05a426a3ec716dbad..fa06b2e0d2055fa0c434a7d459420afe1bbf7c10 100644 --- a/roles/kubernetes-apps/ansible/defaults/main.yml +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -17,6 +17,8 @@ nodelocaldns_cpu_requests: 100m nodelocaldns_memory_limit: 170Mi nodelocaldns_memory_requests: 70Mi nodelocaldns_ds_nodeselector: "kubernetes.io/os: linux" +nodelocaldns_prometheus_port: 9253 +nodelocaldns_secondary_prometheus_port: 9255 # Limits for dns-autoscaler dns_autoscaler_cpu_requests: 20m diff --git a/roles/kubernetes-apps/ansible/tasks/main.yml b/roles/kubernetes-apps/ansible/tasks/main.yml index 75ee477b0aa9c4eb35d7adc5c6caceef99c99999..d59f0e0b6f7ed29422e2c3ac2f54b30021c067f1 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yml +++ b/roles/kubernetes-apps/ansible/tasks/main.yml @@ -48,6 +48,7 @@ - "{{ coredns_manifests.results | default({}) }}" - "{{ coredns_secondary_manifests.results | default({}) }}" - "{{ nodelocaldns_manifests.results | default({}) }}" + - "{{ nodelocaldns_second_manifests.results | default({}) }}" when: - dns_mode != 'none' - inventory_hostname == groups['kube_control_plane'][0] diff --git a/roles/kubernetes-apps/ansible/tasks/nodelocaldns.yml b/roles/kubernetes-apps/ansible/tasks/nodelocaldns.yml index ce79ceed4b80e81a49293ed2f3a75106c910278c..4809aa9b83df3b117d7a8d0dc0e1c63fe15243a2 100644 --- a/roles/kubernetes-apps/ansible/tasks/nodelocaldns.yml +++ b/roles/kubernetes-apps/ansible/tasks/nodelocaldns.yml @@ -43,3 +43,31 @@ tags: - nodelocaldns - coredns + +- name: Kubernetes Apps | Lay Down nodelocaldns-secondary Template + template: + src: "{{ item.file }}.j2" + dest: "{{ kube_config_dir }}/{{ item.file }}" + with_items: + - { name: nodelocaldns, file: nodelocaldns-second-daemonset.yml, type: daemonset } + register: nodelocaldns_second_manifests + vars: + forwardTarget: >- + {%- if secondaryclusterIP is defined and dns_mode == 'coredns_dual' -%} + {{ primaryClusterIP }} {{ secondaryclusterIP }} + {%- else -%} + {{ primaryClusterIP }} + {%- endif -%} + upstreamForwardTarget: >- + {%- if resolvconf_mode == 'host_resolvconf' and upstream_dns_servers is defined and upstream_dns_servers|length > 0 -%} + {{ upstream_dns_servers|join(' ') }} + {%- else -%} + /etc/resolv.conf + {%- endif -%} + when: + - enable_nodelocaldns + - enable_nodelocaldns_secondary + - inventory_hostname == groups['kube_control_plane'] | first + tags: + - nodelocaldns + - coredns diff --git a/roles/kubernetes-apps/ansible/templates/nodelocaldns-config.yml.j2 b/roles/kubernetes-apps/ansible/templates/nodelocaldns-config.yml.j2 index 18abf8ea33b693cf4d9f867b2621b9e5496b936d..0244c04a414a9cd95d331cedc062701247dd429b 100644 --- a/roles/kubernetes-apps/ansible/templates/nodelocaldns-config.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/nodelocaldns-config.yml.j2 @@ -17,7 +17,7 @@ data: loop bind {{ nodelocaldns_ip }} forward . {{ block['nameservers'] | join(' ') }} - prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253 + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }} log {% if dns_etchosts | default(None) %} hosts /etc/coredns/hosts { @@ -39,7 +39,7 @@ data: forward . {{ forwardTarget }} { force_tcp } - prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253 + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }} health {{ nodelocaldns_ip }}:{{ nodelocaldns_health_port }} {% if dns_etchosts | default(None) %} hosts /etc/coredns/hosts { @@ -56,7 +56,7 @@ data: forward . {{ forwardTarget }} { force_tcp } - prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253 + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }} } ip6.arpa:53 { errors @@ -67,7 +67,7 @@ data: forward . {{ forwardTarget }} { force_tcp } - prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253 + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }} } .:53 { errors @@ -76,13 +76,91 @@ data: loop bind {{ nodelocaldns_ip }} forward . {{ upstreamForwardTarget }} - prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253 + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }} {% if dns_etchosts | default(None) %} hosts /etc/coredns/hosts { fallthrough } {% endif %} } +{% if enable_nodelocaldns_secondary %} + Corefile-second: | +{% if nodelocaldns_external_zones is defined and nodelocaldns_external_zones|length > 0 %} +{% for block in nodelocaldns_external_zones %} + {{ block['zones'] | join(' ') }} { + errors + cache {{ block['cache'] | default(30) }} + reload + loop + bind {{ nodelocaldns_ip }} + forward . {{ block['nameservers'] | join(' ') }} + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }} + log +{% if dns_etchosts | default(None) %} + hosts /etc/coredns/hosts { + fallthrough + } +{% endif %} + } +{% endfor %} +{% endif %} + {{ dns_domain }}:53 { + errors + cache { + success 9984 30 + denial 9984 5 + } + reload + loop + bind {{ nodelocaldns_ip }} + forward . {{ forwardTarget }} { + force_tcp + } + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }} + health {{ nodelocaldns_ip }}:{{ nodelocaldns_second_health_port }} +{% if dns_etchosts | default(None) %} + hosts /etc/coredns/hosts { + fallthrough + } +{% endif %} + } + in-addr.arpa:53 { + errors + cache 30 + reload + loop + bind {{ nodelocaldns_ip }} + forward . {{ forwardTarget }} { + force_tcp + } + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }} + } + ip6.arpa:53 { + errors + cache 30 + reload + loop + bind {{ nodelocaldns_ip }} + forward . {{ forwardTarget }} { + force_tcp + } + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }} + } + .:53 { + errors + cache 30 + reload + loop + bind {{ nodelocaldns_ip }} + forward . {{ upstreamForwardTarget }} + prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }} +{% if dns_etchosts | default(None) %} + hosts /etc/coredns/hosts { + fallthrough + } +{% endif %} + } +{% endif %} {% if dns_etchosts | default(None) %} hosts: | {{ dns_etchosts | indent(width=4, indentfirst=None) }} diff --git a/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 b/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 index 7abd28ffab57cd5dbcbab496649d02d4d2dc2544..7c63e28fa914cb173f49e354fb41cd8563052450 100644 --- a/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 @@ -16,7 +16,7 @@ spec: k8s-app: nodelocaldns annotations: prometheus.io/scrape: 'true' - prometheus.io/port: '9253' + prometheus.io/port: '{{ nodelocaldns_prometheus_port }}' spec: nodeSelector: {{ nodelocaldns_ds_nodeselector }} @@ -38,16 +38,16 @@ spec: requests: cpu: {{ nodelocaldns_cpu_requests }} memory: {{ nodelocaldns_memory_requests }} - args: [ "-localip", "{{ nodelocaldns_ip }}", "-conf", "/etc/coredns/Corefile", "-upstreamsvc", "coredns" ] - securityContext: - privileged: true -{% if nodelocaldns_bind_metrics_host_ip %} - env: - - name: MY_HOST_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP -{% endif %} + args: + - -localip + - {{ nodelocaldns_ip }} + - -conf + - /etc/coredns/Corefile + - -upstreamsvc + - coredns +{% if enable_nodelocaldns_secondary %} + - -skipteardown +{% else %} ports: - containerPort: 53 name: dns @@ -58,6 +58,16 @@ spec: - containerPort: 9253 name: metrics protocol: TCP +{% endif %} + securityContext: + privileged: true +{% if nodelocaldns_bind_metrics_host_ip %} + env: + - name: MY_HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP +{% endif %} livenessProbe: httpGet: host: {{ nodelocaldns_ip }} diff --git a/roles/kubernetes-apps/ansible/templates/nodelocaldns-second-daemonset.yml.j2 b/roles/kubernetes-apps/ansible/templates/nodelocaldns-second-daemonset.yml.j2 new file mode 100644 index 0000000000000000000000000000000000000000..037bf446e03d3b79df52ff8db37762b8da7d0d16 --- /dev/null +++ b/roles/kubernetes-apps/ansible/templates/nodelocaldns-second-daemonset.yml.j2 @@ -0,0 +1,103 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nodelocaldns-second + namespace: kube-system + labels: + k8s-app: kube-dns + addonmanager.kubernetes.io/mode: Reconcile +spec: + selector: + matchLabels: + k8s-app: nodelocaldns-second + template: + metadata: + labels: + k8s-app: nodelocaldns-second + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '{{ nodelocaldns_secondary_prometheus_port }}' + spec: + nodeSelector: + {{ nodelocaldns_ds_nodeselector }} + priorityClassName: system-cluster-critical + serviceAccountName: nodelocaldns + hostNetwork: true + dnsPolicy: Default # Don't use cluster DNS. + tolerations: + - effect: NoSchedule + operator: "Exists" + - effect: NoExecute + operator: "Exists" + containers: + - name: node-cache + image: "{{ nodelocaldns_image_repo }}:{{ nodelocaldns_image_tag }}" + resources: + limits: + memory: {{ nodelocaldns_memory_limit }} + requests: + cpu: {{ nodelocaldns_cpu_requests }} + memory: {{ nodelocaldns_memory_requests }} + args: [ "-localip", "{{ nodelocaldns_ip }}", "-conf", "/etc/coredns/Corefile", "-upstreamsvc", "coredns", "-skipteardown" ] + securityContext: + privileged: true +{% if nodelocaldns_bind_metrics_host_ip %} + env: + - name: MY_HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP +{% endif %} + livenessProbe: + httpGet: + host: {{ nodelocaldns_ip }} + path: /health + port: {{ nodelocaldns_health_port }} + scheme: HTTP + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + readinessProbe: + httpGet: + host: {{ nodelocaldns_ip }} + path: /health + port: {{ nodelocaldns_health_port }} + scheme: HTTP + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + volumeMounts: + - name: config-volume + mountPath: /etc/coredns + - name: xtables-lock + mountPath: /run/xtables.lock + lifecycle: + preStop: + exec: + command: + - sh + - -c + - sleep {{ nodelocaldns_secondary_skew_seconds }} && kill -9 1 + volumes: + - name: config-volume + configMap: + name: nodelocaldns + items: + - key: Corefile-second + path: Corefile +{% if dns_etchosts | default(None) %} + - key: hosts + path: hosts +{% endif %} + - name: xtables-lock + hostPath: + path: /run/xtables.lock + type: FileOrCreate + # Implement a time skew between the main nodelocaldns and this secondary. + # Since the two nodelocaldns instances share the :53 port, we want to keep + # at least one running at any time enven if the manifests are replaced simultaneously + terminationGracePeriodSeconds: {{ nodelocaldns_secondary_skew_seconds }} + updateStrategy: + rollingUpdate: + maxUnavailable: {{ serial | default('20%') }} + type: RollingUpdate diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml index 99aec470e3dcdca2bd614cfbfa11d12280182e66..488e1ae5bf3025c07dee2f865ceb333fe1706153 100644 --- a/roles/kubespray-defaults/defaults/main.yaml +++ b/roles/kubespray-defaults/defaults/main.yaml @@ -93,9 +93,12 @@ dns_mode: coredns # Enable nodelocal dns cache enable_nodelocaldns: true +enable_nodelocaldns_secondary: false nodelocaldns_ip: 169.254.25.10 nodelocaldns_health_port: 9254 +nodelocaldns_second_health_port: 9256 nodelocaldns_bind_metrics_host_ip: false +nodelocaldns_secondary_skew_seconds: 5 # Should be set to a cluster IP if using a custom cluster DNS manual_dns_server: "" diff --git a/tests/files/packet_centos8-calico-nodelocaldns-secondary.yml b/tests/files/packet_centos8-calico-nodelocaldns-secondary.yml new file mode 100644 index 0000000000000000000000000000000000000000..600ce6017ab381fffc4400ee7262e7d2c0e888a7 --- /dev/null +++ b/tests/files/packet_centos8-calico-nodelocaldns-secondary.yml @@ -0,0 +1,15 @@ +--- +# Instance settings +cloud_image: centos-8 +mode: default +vm_memory: 3072Mi + +# Kubespray settings +kube_network_plugin: calico +deploy_netchecker: true +dns_min_replicas: 1 +enable_nodelocaldns_secondary: true +loadbalancer_apiserver_type: haproxy + +# required +calico_iptables_backend: "Auto"