From c5e425b02b2941e9699ebdeaef8799c850fced87 Mon Sep 17 00:00:00 2001
From: okamototk <toraneko@gmail.com>
Date: Fri, 23 Nov 2018 17:36:21 +0900
Subject: [PATCH] Support Metrics Server as addon (#3560). (#3563)

* Support Metrics Server as addon (#3560).

* Update metrics server v0.3.1.

* Add metrics server test.

* Replace metrics server manifests with kubernetes/cluster/addons's.

* Modify metrics server manifests for kubespray.

* Follow PR#3558 node label node-role.kubernetes.io/master change

* Fix metrics server parameters base_metrics_server_... to metrics_server_...

* Fix too hard corded metrics_server_memory_per_node

* Add configurable insecure tls for metrics-apiservice

* Downloadable addon-resizer and extract parameter as variables

* Remove metrics server version from deployment name

* Metrics Server work when all masters has node role

* Download metrics-server and add-resizer container only on master

* ServiceAccount and ConfigMap is separated and fix application name

* Remove old metrics server clusterrole template

* Fix addon-resizer image specify

* Make InternalIP default for metrics_server_kubelet_preferred_address_types

Make InternalIP default because multiple preferrred address types does not work.
---
 .../sample/group_vars/k8s-cluster/addons.yml  |   6 +
 roles/download/defaults/main.yml              |  25 ++++
 roles/kubernetes-apps/meta/main.yml           |   8 ++
 .../metrics_server/defaults/main.yml          |  12 ++
 .../metrics_server/tasks/main.yml             |  57 ++++++++
 .../templates/auth-delegator.yaml.j2          |  15 ++
 .../templates/auth-reader.yaml.j2             |  16 +++
 .../templates/metrics-apiservice.yaml.j2      |  16 +++
 .../templates/metrics-server-cm.yaml.j2       |  13 ++
 .../metrics-server-deployment.yaml.j2         | 134 ++++++++++++++++++
 .../templates/metrics-server-sa.yaml.j2       |   9 ++
 .../templates/metrics-server-service.yaml.j2  |  16 +++
 ...resource-reader-clusterrolebinding.yaml.j2 |  16 +++
 .../templates/resource-reader.yaml.j2         |  27 ++++
 roles/kubespray-defaults/defaults/main.yaml   |   1 +
 tests/files/gce_centos7-flannel-addons.yml    |   1 +
 16 files changed, 372 insertions(+)
 create mode 100644 roles/kubernetes-apps/metrics_server/defaults/main.yml
 create mode 100644 roles/kubernetes-apps/metrics_server/tasks/main.yml
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/auth-delegator.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/auth-reader.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/metrics-apiservice.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/metrics-server-cm.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/metrics-server-sa.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/metrics-server-service.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/resource-reader-clusterrolebinding.yaml.j2
 create mode 100644 roles/kubernetes-apps/metrics_server/templates/resource-reader.yaml.j2

diff --git a/inventory/sample/group_vars/k8s-cluster/addons.yml b/inventory/sample/group_vars/k8s-cluster/addons.yml
index 01699138b..ca801d3cb 100644
--- a/inventory/sample/group_vars/k8s-cluster/addons.yml
+++ b/inventory/sample/group_vars/k8s-cluster/addons.yml
@@ -11,6 +11,12 @@ registry_enabled: false
 # registry_storage_class: ""
 # registry_disk_size: "10Gi"
 
+# Metrics Server deployment
+metrics_server_enabled: false
+# metrics_server_kubelet_insecure_tls: true
+# metrics_server_metric_resolution: 60s
+# metrics_server_kubelet_preferred_address_types: "InternalIP"
+
 # Local volume provisioner deployment
 local_volume_provisioner_enabled: false
 # local_volume_provisioner_namespace: kube-system
diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml
index 35e0d17de..6420a94de 100644
--- a/roles/download/defaults/main.yml
+++ b/roles/download/defaults/main.yml
@@ -198,6 +198,9 @@ registry_image_repo: "registry"
 registry_image_tag: "2.6"
 registry_proxy_image_repo: "gcr.io/google_containers/kube-registry-proxy"
 registry_proxy_image_tag: "0.4"
+metrics_server_version: "v0.3.1"
+metrics_server_image_repo: "k8s.gcr.io/metrics-server-amd64"
+metrics_server_image_tag: "{{ metrics_server_version }}"
 local_volume_provisioner_image_repo: "quay.io/external_storage/local-volume-provisioner"
 local_volume_provisioner_image_tag: "v2.1.0"
 cephfs_provisioner_image_repo: "quay.io/external_storage/cephfs-provisioner"
@@ -209,6 +212,9 @@ ingress_nginx_default_backend_image_tag: "1.5"
 cert_manager_version: "v0.5.2"
 cert_manager_controller_image_repo: "quay.io/jetstack/cert-manager-controller"
 cert_manager_controller_image_tag: "{{ cert_manager_version }}"
+addon_resizer_version: "1.8.3"
+addon_resizer_image_repo: "k8s.gcr.io/addon-resizer"
+addon_resizer_image_tag: "{{ addon_resizer_version }}"
 
 downloads:
   netcheck_server:
@@ -548,6 +554,25 @@ downloads:
     groups:
       - kube-node
 
+  metrics_server:
+    enabled: "{{ metrics_server_enabled }}"
+    container: true
+    repo: "{{ metrics_server_image_repo }}"
+    tag: "{{ metrics_server_image_tag }}"
+    sha256: "{{ metrics_server_digest_checksum|default(None) }}"
+    groups:
+      - kube-master
+
+  addon_resizer:
+    # Currently addon_resizer is only used by metrics server
+    enabled: "{{ metrics_server_enabled }}"
+    container: true
+    repo: "{{ addon_resizer_image_repo }}"
+    tag: "{{ addon_resizer_image_tag }}"
+    sha256: "{{ addon_resizer_digest_checksum|default(None) }}"
+    groups:
+      - kube-master
+
   local_volume_provisioner:
     enabled: "{{ local_volume_provisioner_enabled }}"
     container: true
diff --git a/roles/kubernetes-apps/meta/main.yml b/roles/kubernetes-apps/meta/main.yml
index 52ddee1bc..13cf0af89 100644
--- a/roles/kubernetes-apps/meta/main.yml
+++ b/roles/kubernetes-apps/meta/main.yml
@@ -21,6 +21,14 @@ dependencies:
       - apps
       - registry
 
+  - role: kubernetes-apps/metrics_server
+    when:
+      - metrics_server_enabled
+      - inventory_hostname == groups['kube-master'][0]
+    tags:
+      - apps
+      - metrics_server
+
   - role: kubernetes-apps/persistent_volumes
     when:
       - persistent_volumes_enabled
diff --git a/roles/kubernetes-apps/metrics_server/defaults/main.yml b/roles/kubernetes-apps/metrics_server/defaults/main.yml
new file mode 100644
index 000000000..94dcfc0d9
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/defaults/main.yml
@@ -0,0 +1,12 @@
+---
+metrics_server_kubelet_insecure_tls: true
+metrics_server_kubelet_preferred_address_types: "InternalIP"
+metrics_server_metric_resolution: 60s
+metrics_server_cpu: 40m
+metrics_server_memory: 35Mi
+metrics_server_memory_per_node: 4Mi
+metrics_server_min_cluster_size: 5
+addon_resizer_limits_cpu: 100m
+addon_resizer_limits_memory: 300Mi
+addon_resizer_requests_cpu: 5m
+addon_resizer_requests_memory: 50Mi
diff --git a/roles/kubernetes-apps/metrics_server/tasks/main.yml b/roles/kubernetes-apps/metrics_server/tasks/main.yml
new file mode 100644
index 000000000..d7dc45443
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/tasks/main.yml
@@ -0,0 +1,57 @@
+---
+# If all masters have node role, there are no tainted master and toleration should not be specified.
+- name: Check all masters are node or not
+  set_fact:
+    masters_are_not_tainted: "{{ groups['kube-node'] | intersect(groups['kube-master']) == groups['kube-master'] }}"
+
+- name: Metrics Server | Delete addon dir
+  file:
+    path: "{{ kube_config_dir }}/addons/metrics_server"
+    state: absent
+  when:
+    - inventory_hostname == groups['kube-master'][0]
+  tags:
+    - upgrade
+
+- name: Metrics Server | Create addon dir
+  file:
+    path: "{{ kube_config_dir }}/addons/metrics_server"
+    state: directory
+    owner: root
+    group: root
+    mode: 0755
+  when:
+    - inventory_hostname == groups['kube-master'][0]
+
+- name: Metrics Server | Templates list
+  set_fact:
+    metrics_server_templates:
+      - { name: auth-delegator, file: auth-delegator.yaml, type: clusterrolebinding }
+      - { name: auth-reader, file: auth-reader.yaml, type: rolebinding }
+      - { name: metrics-server-cm, file: metrics-server-cm.yaml, type: cm }
+      - { name: metrics-server-sa, file: metrics-server-sa.yaml, type: sa }
+      - { name: metrics-server-deployment, file: metrics-server-deployment.yaml, type: deploy }
+      - { name: metrics-server-service, file: metrics-server-service.yaml, type: service }
+      - { name: metrics-apiservice, file: metrics-apiservice.yaml, type: service }
+      - { name: resource-reader-clusterrolebinding, file: resource-reader-clusterrolebinding.yaml, type: clusterrolebinding }
+      - { name: resource-reader, file: resource-reader.yaml, type: clusterrole }
+
+- name: Metrics Server | Create manifests
+  template:
+    src: "{{ item.file }}.j2"
+    dest: "{{ kube_config_dir }}/addons/metrics_server/{{ item.file }}"
+  with_items: "{{ metrics_server_templates }}"
+  register: metrics_server_manifests
+  when:
+    - inventory_hostname == groups['kube-master'][0]
+
+- name: Metrics Server | Apply manifests
+  kube:
+    name: "{{ item.item.name }}"
+    kubectl: "{{ bin_dir }}/kubectl"
+    resource: "{{ item.item.type }}"
+    filename: "{{ kube_config_dir }}/addons/metrics_server/{{ item.item.file }}"
+    state: "latest"
+  with_items: "{{ metrics_server_manifests.results }}"
+  when:
+    - inventory_hostname == groups['kube-master'][0]
diff --git a/roles/kubernetes-apps/metrics_server/templates/auth-delegator.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/auth-delegator.yaml.j2
new file mode 100644
index 000000000..cbaa62506
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/auth-delegator.yaml.j2
@@ -0,0 +1,15 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: metrics-server:system:auth-delegator
+  labels:
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: Reconcile
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:auth-delegator
+subjects:
+- kind: ServiceAccount
+  name: metrics-server
+  namespace: kube-system
diff --git a/roles/kubernetes-apps/metrics_server/templates/auth-reader.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/auth-reader.yaml.j2
new file mode 100644
index 000000000..60da052c1
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/auth-reader.yaml.j2
@@ -0,0 +1,16 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: metrics-server-auth-reader
+  namespace: kube-system
+  labels:
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: Reconcile
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: extension-apiserver-authentication-reader
+subjects:
+- kind: ServiceAccount
+  name: metrics-server
+  namespace: kube-system
diff --git a/roles/kubernetes-apps/metrics_server/templates/metrics-apiservice.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/metrics-apiservice.yaml.j2
new file mode 100644
index 000000000..51770da68
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/metrics-apiservice.yaml.j2
@@ -0,0 +1,16 @@
+apiVersion: apiregistration.k8s.io/v1beta1
+kind: APIService
+metadata:
+  name: v1beta1.metrics.k8s.io
+  labels:
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: Reconcile
+spec:
+  service:
+    name: metrics-server
+    namespace: kube-system
+  group: metrics.k8s.io
+  version: v1beta1
+  insecureSkipTLSVerify: {{ metrics_server_kubelet_insecure_tls }}
+  groupPriorityMinimum: 100
+  versionPriority: 100
diff --git a/roles/kubernetes-apps/metrics_server/templates/metrics-server-cm.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/metrics-server-cm.yaml.j2
new file mode 100644
index 000000000..f969ff759
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/metrics-server-cm.yaml.j2
@@ -0,0 +1,13 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: metrics-server-config
+  namespace: kube-system
+  labels:
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: EnsureExists
+data:
+  NannyConfiguration: |-
+    apiVersion: nannyconfig/v1alpha1
+    kind: NannyConfiguration
diff --git a/roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2
new file mode 100644
index 000000000..6cb51d025
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2
@@ -0,0 +1,134 @@
+---
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: metrics-server
+  namespace: kube-system
+  labels:
+    app.kubernetes.io/name: metrics-server
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: Reconcile
+    version: {{ metrics_server_version }}
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: metrics-server
+      version: {{ metrics_server_version }}
+  template:
+    metadata:
+      name: metrics-server
+      labels:
+        app.kubernetes.io/name: metrics-server
+        version: {{ metrics_server_version }}
+      annotations:
+        scheduler.alpha.kubernetes.io/critical-pod: ''
+        seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
+    spec:
+{% if kube_version is version('v1.11.1', '>=') %}
+      priorityClassName: system-cluster-critical
+{% endif %}
+      serviceAccountName: metrics-server
+      containers:
+      - name: metrics-server
+        image: {{ metrics_server_image_repo }}:{{ metrics_server_image_tag }}
+        command:
+        - /metrics-server
+{% if metrics_server_kubelet_preferred_address_types %}
+        - --kubelet-preferred-address-types={{ metrics_server_kubelet_preferred_address_types }}
+{% endif %}
+{% if metrics_server_kubelet_insecure_tls %}
+        - --kubelet-insecure-tls
+{% endif %}
+        - --metric-resolution={{ metrics_server_metric_resolution }}
+        ports:
+        - containerPort: 443
+          name: https
+          protocol: TCP
+        livenessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /healthz
+            port: https
+            scheme: HTTPS
+          initialDelaySeconds: 30
+          periodSeconds: 30
+          successThreshold: 1
+          timeoutSeconds: 10
+        readinessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /healthz
+            port: 443
+            scheme: HTTPS
+          initialDelaySeconds: 30
+          periodSeconds: 30
+          successThreshold: 1
+          timeoutSeconds: 10
+        securityContext:
+          # Currently non root is not supported:
+          #   https://github.com/kubernetes-incubator/metrics-server/issues/37
+          #
+          # runAsNonRoot: true
+          # runAsUser: 65534
+          capabilities:
+            drop:
+            - ALL
+            add:
+            - NET_BIND_SERVICE
+      - name: metrics-server-nanny
+        image: {{ addon_resizer_image_repo }}:{{ addon_resizer_image_tag }}
+        resources:
+          limits:
+            cpu: {{ addon_resizer_limits_cpu }}
+            memory: {{ addon_resizer_limits_memory }}
+          requests:
+            cpu: {{ addon_resizer_requests_cpu }}
+            memory: {{ addon_resizer_requests_memory }}
+        env:
+          - name: MY_POD_NAME
+            valueFrom:
+              fieldRef:
+                fieldPath: metadata.name
+          - name: MY_POD_NAMESPACE
+            valueFrom:
+              fieldRef:
+                fieldPath: metadata.namespace
+        volumeMounts:
+        - name: metrics-server-config-volume
+          mountPath: /etc/config
+        command:
+          - /pod_nanny
+          - --config-dir=/etc/config
+          - --cpu={{ metrics_server_cpu }}
+          - --extra-cpu=0.5m
+          - --memory={{ metrics_server_memory }}
+          - --extra-memory={{ metrics_server_memory_per_node }}
+          - --threshold=5
+          - --deployment=metrics-server-{{ metrics_server_version }}
+          - --container=metrics-server
+          - --poll-period=300000
+          - --estimator=exponential
+          # Specifies the smallest cluster (defined in number of nodes)
+          # resources will be scaled to.
+          - --minClusterSize={{ metrics_server_min_cluster_size }}
+      volumes:
+        - name: metrics-server-config-volume
+          configMap:
+            name: metrics-server-config
+{% if not masters_are_not_tainted %}
+      tolerations:
+        - key: node-role.kubernetes.io/master
+          effect: NoSchedule
+        - key: "CriticalAddonsOnly"
+          operator: "Exists"
+{% endif %}
+      affinity:
+        nodeAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 100
+            preference:
+              matchExpressions:
+              - key: node-role.kubernetes.io/master
+                operator: In
+                values:
+                - ""
diff --git a/roles/kubernetes-apps/metrics_server/templates/metrics-server-sa.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/metrics-server-sa.yaml.j2
new file mode 100644
index 000000000..fa79edca4
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/metrics-server-sa.yaml.j2
@@ -0,0 +1,9 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: metrics-server
+  namespace: kube-system
+  labels:
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: Reconcile
diff --git a/roles/kubernetes-apps/metrics_server/templates/metrics-server-service.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/metrics-server-service.yaml.j2
new file mode 100644
index 000000000..aa01cd645
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/metrics-server-service.yaml.j2
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: metrics-server
+  namespace: kube-system
+  labels:
+    addonmanager.kubernetes.io/mode: Reconcile
+    kubernetes.io/cluster-service: "true"
+    app.kubernetes.io/name: "metrics-server"
+spec:
+  selector:
+    app.kubernetes.io/name: metrics-server
+  ports:
+  - port: 443
+    protocol: TCP
+    targetPort: https
diff --git a/roles/kubernetes-apps/metrics_server/templates/resource-reader-clusterrolebinding.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/resource-reader-clusterrolebinding.yaml.j2
new file mode 100644
index 000000000..0e59d5ed0
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/resource-reader-clusterrolebinding.yaml.j2
@@ -0,0 +1,16 @@
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: system:metrics-server
+  labels:
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: Reconcile
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:metrics-server
+subjects:
+- kind: ServiceAccount
+  name: metrics-server
+  namespace: kube-system
diff --git a/roles/kubernetes-apps/metrics_server/templates/resource-reader.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/resource-reader.yaml.j2
new file mode 100644
index 000000000..07db929c5
--- /dev/null
+++ b/roles/kubernetes-apps/metrics_server/templates/resource-reader.yaml.j2
@@ -0,0 +1,27 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: system:metrics-server
+  labels:
+    kubernetes.io/cluster-service: "true"
+    addonmanager.kubernetes.io/mode: Reconcile
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  - nodes
+  - namespaces
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - "extensions"
+  resources:
+  - deployments
+  verbs:
+  - get
+  - list
+  - update
+  - watch
diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml
index c9c8c5575..9039d4c6c 100644
--- a/roles/kubespray-defaults/defaults/main.yaml
+++ b/roles/kubespray-defaults/defaults/main.yaml
@@ -252,6 +252,7 @@ dashboard_enabled: true
 # Addons which can be enabled
 helm_enabled: false
 registry_enabled: false
+metrics_server_enabled: false
 enable_network_policy: true
 local_volume_provisioner_enabled: "{{ local_volumes_enabled | default('false') }}"
 persistent_volumes_enabled: false
diff --git a/tests/files/gce_centos7-flannel-addons.yml b/tests/files/gce_centos7-flannel-addons.yml
index 0a8712a91..3847fbc91 100644
--- a/tests/files/gce_centos7-flannel-addons.yml
+++ b/tests/files/gce_centos7-flannel-addons.yml
@@ -17,3 +17,4 @@ cloud_provider: gce
 kube_encrypt_secret_data: true
 ingress_nginx_enabled: true
 cert_manager_enabled: true
+metrics_server_enabled: true
-- 
GitLab