From 0263c649f430722a24c36ce02c04703694a7e7d1 Mon Sep 17 00:00:00 2001
From: Mathieu Parent <math.parent@gmail.com>
Date: Thu, 18 Nov 2021 08:53:01 +0100
Subject: [PATCH] Allow to scrape etcd metrics using a service (#8203)

Signed-off-by: Mathieu Parent <math.parent@gmail.com>
---
 docs/etcd.md                                  | 30 +++++++++++++++++++
 roles/etcd/defaults/main.yml                  |  2 +-
 .../kubernetes-apps/ansible/defaults/main.yml |  7 +++++
 .../ansible/tasks/etcd_metrics.yml            | 21 +++++++++++++
 roles/kubernetes-apps/ansible/tasks/main.yml  |  6 ++++
 .../templates/etcd_metrics-endpoints.yml.j2   | 17 +++++++++++
 .../templates/etcd_metrics-service.yml.j2     | 13 ++++++++
 roles/kubespray-defaults/defaults/main.yaml   |  4 +++
 8 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 docs/etcd.md
 create mode 100644 roles/kubernetes-apps/ansible/tasks/etcd_metrics.yml
 create mode 100644 roles/kubernetes-apps/ansible/templates/etcd_metrics-endpoints.yml.j2
 create mode 100644 roles/kubernetes-apps/ansible/templates/etcd_metrics-service.yml.j2

diff --git a/docs/etcd.md b/docs/etcd.md
new file mode 100644
index 000000000..2d42ffb10
--- /dev/null
+++ b/docs/etcd.md
@@ -0,0 +1,30 @@
+# etcd
+
+## Metrics
+
+To expose metrics on a separate HTTP port, define it in the inventory with:
+
+```yaml
+etcd_metrics_port: 2381
+```
+
+To create a service `etcd-metrics` and associated endpoints in the `kube-system` namespace,
+define it's labels in the inventory with:
+
+```yaml
+etcd_metrics_service_labels:
+  k8s-app: etcd
+  app.kubernetes.io/managed-by: Kubespray
+  app: kube-prometheus-stack-kube-etcd
+  release: prometheus-stack
+```
+
+The last two labels in the above example allows to scrape the metrics from the
+[kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack)
+chart with the following Helm `values.yaml` :
+
+```yaml
+kubeEtcd:
+  service:
+    enabled: false
+```
diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml
index c11758e95..ab78abaf9 100644
--- a/roles/etcd/defaults/main.yml
+++ b/roles/etcd/defaults/main.yml
@@ -35,7 +35,7 @@ etcd_election_timeout: "5000"
 
 etcd_metrics: "basic"
 
-# Uncomment to set a separate port for etcd to expose metrics on
+# Define in inventory to set a separate port for etcd to expose metrics on
 # etcd_metrics_port: 2381
 
 ## A dictionary of extra environment variables to add to etcd.env, formatted like:
diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml
index fa06b2e0d..37db5b6f5 100644
--- a/roles/kubernetes-apps/ansible/defaults/main.yml
+++ b/roles/kubernetes-apps/ansible/defaults/main.yml
@@ -25,6 +25,13 @@ dns_autoscaler_cpu_requests: 20m
 dns_autoscaler_memory_requests: 10Mi
 dns_autoscaler_deployment_nodeselector: "kubernetes.io/os: linux"
 
+# etcd metrics
+# etcd_metrics_service_labels:
+#   k8s-app: etcd
+#   app.kubernetes.io/managed-by: Kubespray
+#   app: kube-prometheus-stack-kube-etcd
+#   release: prometheus-stack
+
 # Netchecker
 deploy_netchecker: false
 netchecker_port: 31081
diff --git a/roles/kubernetes-apps/ansible/tasks/etcd_metrics.yml b/roles/kubernetes-apps/ansible/tasks/etcd_metrics.yml
new file mode 100644
index 000000000..0608fd375
--- /dev/null
+++ b/roles/kubernetes-apps/ansible/tasks/etcd_metrics.yml
@@ -0,0 +1,21 @@
+---
+- name: Kubernetes Apps | Lay down etcd_metrics templates
+  template:
+    src: "{{ item.file }}.j2"
+    dest: "{{ kube_config_dir }}/{{ item.file }}"
+  with_items:
+    - { file: etcd_metrics-endpoints.yml, type: endpoints, name: etcd-metrics }
+    - { file: etcd_metrics-service.yml, type: service, name: etcd-metrics }
+  register: manifests
+  when: inventory_hostname == groups['kube_control_plane'][0]
+
+- name: Kubernetes Apps | Start etcd_metrics
+  kube:
+    name: "{{ item.item.name }}"
+    namespace: kube-system
+    kubectl: "{{ bin_dir }}/kubectl"
+    resource: "{{ item.item.type }}"
+    filename: "{{ kube_config_dir }}/{{ item.item.file }}"
+    state: "latest"
+  with_items: "{{ manifests.results }}"
+  when: inventory_hostname == groups['kube_control_plane'][0]
diff --git a/roles/kubernetes-apps/ansible/tasks/main.yml b/roles/kubernetes-apps/ansible/tasks/main.yml
index d59f0e0b6..4a0180ede 100644
--- a/roles/kubernetes-apps/ansible/tasks/main.yml
+++ b/roles/kubernetes-apps/ansible/tasks/main.yml
@@ -63,6 +63,12 @@
   loop_control:
     label: "{{ item.item.file }}"
 
+- name: Kubernetes Apps | Etcd metrics endpoints
+  import_tasks: etcd_metrics.yml
+  when: etcd_metrics_port is defined and etcd_metrics_service_labels is defined
+  tags:
+    - etcd_metrics
+
 - name: Kubernetes Apps | Netchecker
   import_tasks: netchecker.yml
   when: deploy_netchecker
diff --git a/roles/kubernetes-apps/ansible/templates/etcd_metrics-endpoints.yml.j2 b/roles/kubernetes-apps/ansible/templates/etcd_metrics-endpoints.yml.j2
new file mode 100644
index 000000000..d8b4bcd90
--- /dev/null
+++ b/roles/kubernetes-apps/ansible/templates/etcd_metrics-endpoints.yml.j2
@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Endpoints
+metadata:
+  name: etcd-metrics
+  namespace: kube-system
+  labels:
+    k8s-app: etcd
+    app.kubernetes.io/managed-by: Kubespray
+subsets:
+{% for etcd_metrics_address in etcd_metrics_addresses.split(',') %}
+  - addresses:
+      - ip: {{ etcd_metrics_address | urlsplit('hostname') }}
+    ports:
+      - name: http-metrics
+        port: {{ etcd_metrics_address | urlsplit('port') }}
+        protocol: TCP
+{% endfor %}
diff --git a/roles/kubernetes-apps/ansible/templates/etcd_metrics-service.yml.j2 b/roles/kubernetes-apps/ansible/templates/etcd_metrics-service.yml.j2
new file mode 100644
index 000000000..5bd9254ab
--- /dev/null
+++ b/roles/kubernetes-apps/ansible/templates/etcd_metrics-service.yml.j2
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: etcd-metrics
+  namespace: kube-system
+  labels:
+    {{ etcd_metrics_service_labels | to_yaml(indent=2, width=1337) | indent(width=4) }}
+spec:
+  ports:
+    - name: http-metrics
+      protocol: TCP
+      port: {{ etcd_metrics_port }}
+      # targetPort:
diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml
index 8b5b78ddf..18e0913df 100644
--- a/roles/kubespray-defaults/defaults/main.yaml
+++ b/roles/kubespray-defaults/defaults/main.yaml
@@ -543,6 +543,10 @@ etcd_events_access_addresses_list: |-
     'https://{{ hostvars[item]['etcd_events_access_address'] | default(hostvars[item]['ip'] | default(fallback_ips[item])) }}:2381'{% if not loop.last %},{% endif %}
   {%- endfor %}
   ]
+etcd_metrics_addresses: |-
+  {% for item in etcd_hosts -%}
+    https://{{ hostvars[item]['etcd_access_address'] | default(hostvars[item]['ip'] | default(fallback_ips[item])) }}:{{ etcd_metrics_port | default(2381) }}{% if not loop.last %},{% endif %}
+  {%- endfor %}
 etcd_events_access_addresses: "{{etcd_events_access_addresses_list | join(',')}}"
 etcd_events_access_addresses_semicolon: "{{etcd_events_access_addresses_list | join(';')}}"
 # user should set etcd_member_name in inventory/mycluster/hosts.ini
-- 
GitLab