Skip to content
Snippets Groups Projects
Commit a56d9de5 authored by Bogdan Dobrelya's avatar Bogdan Dobrelya Committed by Bogdan Dobrelya
Browse files

Systemd units, limits, and bin path fixes


* Add restart for weave service unit
* Reuse docker_bin_dir everythere
* Limit systemd managed docker containers by CPU/RAM. Do not configure native
  systemd limits due to the lack of consensus in the kernel community
  requires out-of-tree kernel patches.

Signed-off-by: default avatarBogdan Dobrelya <bdobrelia@mirantis.com>
parent 6139ee3a
Branches
Tags
No related merge requests found
Showing
with 114 additions and 20 deletions
......@@ -20,5 +20,12 @@ For a large scaled deployments, consider the following configuration changes:
``dns_cpu_requests``, ``dns_memory_limit``, ``dns_memory_requests``.
Please note that limits must always be greater than or equal to requests.
* Tune CPU/memory limits and requests. Those are located in roles' defaults
and named like ``foo_memory_limit``, ``foo_memory_requests`` and
``foo_cpu_limit``, ``foo_cpu_requests``. Note that 'Mi' memory units for K8s
will be submitted as 'M', if applied for ``docker run``, and cpu K8s units will
end up with the 'm' skipped for docker as well. This is required as docker does not
understand k8s units well.
For example, when deploying 200 nodes, you may want to run ansible with
``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.
......@@ -188,6 +188,7 @@ docker_daemon_graph: "/var/lib/docker"
## An obvious use case is allowing insecure-registry access
## to self hosted registries like so:
docker_options: "--insecure-registry={{ kube_service_addresses }} --graph={{ docker_daemon_graph }}"
docker_bin_dir: "/usr/bin"
## Uncomment this if you want to force overlay/overlay2 as docker storage driver
## Please note that overlay2 is only supported on newer kernels
......
......@@ -27,7 +27,7 @@
pause: seconds=10 prompt="Waiting for docker restart"
- name: Docker | wait for docker
command: /usr/bin/docker images
command: "{{ docker_bin_dir }}/docker images"
register: docker_ready
retries: 10
delay: 5
......
......@@ -18,7 +18,7 @@ Environment=GOTRACEBACK=crash
ExecReload=/bin/kill -s HUP $MAINPID
Delegate=yes
KillMode=process
ExecStart=/usr/bin/docker daemon \
ExecStart={{ docker_bin_dir }}/docker daemon \
$DOCKER_OPTS \
$DOCKER_STORAGE_OPTIONS \
$DOCKER_NETWORK_OPTIONS \
......
......@@ -79,7 +79,7 @@
#NOTE(bogdando) this brings no docker-py deps for nodes
- name: Download containers if pull is required or told to always pull
command: "/usr/bin/docker pull {{ pull_args }}"
command: "{{ docker_bin_dir }}/docker pull {{ pull_args }}"
register: pull_task_result
until: pull_task_result|success
retries: 4
......@@ -115,7 +115,7 @@
tags: facts
- name: Download | save container images
shell: docker save "{{ pull_args }}" | gzip -{{ download_compress }} > "{{ fname }}"
shell: "{{ docker_bin_dir }}/docker save {{ pull_args }} | gzip -{{ download_compress }} > {{ fname }}"
delegate_to: "{{ download_delegate }}"
register: saved
run_once: true
......@@ -145,6 +145,6 @@
tags: [upload, upgrade]
- name: Download | load container images
shell: docker load < "{{ fname }}"
shell: "{{ docker_bin_dir }}/docker load < {{ fname }}"
when: (ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] or download_delegate == "localhost") and download_run_once|bool and download.enabled|bool and download.container|bool
tags: [upload, upgrade]
......@@ -8,7 +8,7 @@
{%- if pull_by_digest|bool %}{{download.repo}}@sha256:{{download.sha256}}{%- else -%}{{download.repo}}:{{download.tag}}{%- endif -%}
- name: Register docker images info
shell: "{% raw %}/usr/bin/docker images -q | xargs /usr/bin/docker inspect -f '{{.RepoTags}},{{.RepoDigests}}'{% endraw %}"
shell: "{{ docker_bin_dir }}/docker images -q | xargs {{ docker_bin_dir }}/docker inspect -f {% raw %}'{{.RepoTags}},{{.RepoDigests}}'{% endraw %}"
register: docker_images_raw
failed_when: false
when: not download_always_pull|bool
......
......@@ -6,3 +6,7 @@ etcd_cert_dir: "{{ etcd_config_dir }}/ssl"
etcd_cert_group: root
etcd_script_dir: "{{ bin_dir }}/etcd-scripts"
# Limits
etcd_memory_limit: 512M
etcd_cpu_limit: 300m
......@@ -12,10 +12,10 @@
#Plan A: no docker-py deps
- name: Install | Copy etcdctl binary from container
command: sh -c "/usr/bin/docker rm -f etcdctl-binarycopy;
/usr/bin/docker create --name etcdctl-binarycopy {{ etcd_image_repo }}:{{ etcd_image_tag }} &&
/usr/bin/docker cp etcdctl-binarycopy:{{ etcd_container_bin_dir }}etcdctl {{ bin_dir }}/etcdctl &&
/usr/bin/docker rm -f etcdctl-binarycopy"
command: sh -c "{{ docker_bin_dir }}/docker rm -f etcdctl-binarycopy;
{{ docker_bin_dir }}/docker create --name etcdctl-binarycopy {{ etcd_image_repo }}:{{ etcd_image_tag }} &&
{{ docker_bin_dir }}/docker cp etcdctl-binarycopy:{{ etcd_container_bin_dir }}etcdctl {{ bin_dir }}/etcdctl &&
{{ docker_bin_dir }}/docker rm -f etcdctl-binarycopy"
when: etcd_deployment_type == "docker"
register: etcd_task_result
until: etcd_task_result.rc == 0
......
......@@ -26,12 +26,12 @@
- /etc/init.d/etcd-proxy
- name: "Pre-upgrade | find etcd-proxy container"
command: docker ps -aq --filter "name=etcd-proxy*"
command: "{{ docker_bin_dir }}/docker ps -aq --filter 'name=etcd-proxy*'"
register: etcd_proxy_container
failed_when: false
- name: "Pre-upgrade | remove etcd-proxy if it exists"
command: "docker rm -f {{item}}"
command: "{{ docker_bin_dir }}/docker rm -f {{item}}"
with_items: "{{etcd_proxy_container.stdout_lines}}"
- name: "Pre-upgrade | check if member list is non-SSL"
......
......@@ -15,7 +15,7 @@ set -a
PATH=/sbin:/usr/sbin:/bin/:/usr/bin
DESC="etcd k/v store"
NAME=etcd
DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker
DAEMON={{ docker_bin_dir }}/docker
DAEMON_EXEC=`basename $DAEMON`
DAEMON_ARGS="run --restart=on-failure:5 --env-file=/etc/etcd.env \
--net=host \
......@@ -50,7 +50,7 @@ do_status()
#
do_start()
{
{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }} &>/dev/null || true
{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }} &>/dev/null || true
sleep 1
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \
......
......@@ -6,7 +6,7 @@ After=docker.service
[Service]
User=root
PermissionsStartOnly=true
ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=on-failure:5 \
ExecStart={{ docker_bin_dir }}/docker run --restart=on-failure:5 \
--env-file=/etc/etcd.env \
{# TODO(mattymo): Allow docker IP binding and disable in envfile
-p 2380:2380 -p 2379:2379 #}
......@@ -14,14 +14,15 @@ ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=on-fai
-v /etc/ssl/certs:/etc/ssl/certs:ro \
-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \
-v /var/lib/etcd:/var/lib/etcd:rw \
--memory={{ etcd_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ etcd_cpu_limit|regex_replace('m', '') }} \
--name={{ etcd_member_name | default("etcd") }} \
{{ etcd_image_repo }}:{{ etcd_image_tag }} \
{% if etcd_after_v3 %}
{{ etcd_container_bin_dir }}etcd
{% endif %}
ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }}
ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_member_name | default("etcd-proxy") }}
ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_member_name | default("etcd-proxy") }}
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }}
ExecReload={{ docker_bin_dir }}/docker restart {{ etcd_member_name | default("etcd") }}
ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name | default("etcd") }}
Restart=always
RestartSec=15s
......
......@@ -20,6 +20,12 @@ exechealthz_image_tag: "{{ exechealthz_version }}"
calico_policy_image_repo: "calico/kube-policy-controller"
calico_policy_image_tag: latest
# Limits for calico apps
calico_policy_controller_cpu_limit: 100m
calico_policy_controller_memory_limit: 256M
calico_policy_controller_cpu_requests: 30m
calico_policy_controller_memory_requests: 128M
# Netchecker
deploy_netchecker: false
netchecker_port: 31081
......@@ -29,5 +35,19 @@ agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty"
# Limits for netchecker apps
netchecker_agent_cpu_limit: 30m
netchecker_agent_memory_limit: 100M
netchecker_agent_cpu_requests: 15m
netchecker_agent_memory_requests: 64M
netchecker_server_cpu_limit: 100m
netchecker_server_memory_limit: 256M
netchecker_server_cpu_requests: 50m
netchecker_server_memory_requests: 128M
netchecker_kubectl_cpu_limit: 30m
netchecker_kubectl_memory_limit: 128M
netchecker_kubectl_cpu_requests: 15m
netchecker_kubectl_memory_requests: 64M
# SSL
etcd_cert_dir: "/etc/ssl/etcd/ssl"
......@@ -25,6 +25,13 @@ spec:
- name: calico-policy-controller
image: {{ calico_policy_image_repo }}:{{ calico_policy_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ calico_policy_controller_cpu_limit }}
memory: {{ calico_policy_controller_memory_limit }}
requests:
cpu: {{ calico_policy_controller_cpu_requests }}
memory: {{ calico_policy_controller_memory_requests }}
env:
- name: ETCD_ENDPOINTS
value: "{{ etcd_access_endpoint }}"
......
......@@ -23,3 +23,10 @@ spec:
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_agent_cpu_limit }}
memory: {{ netchecker_agent_memory_limit }}
requests:
cpu: {{ netchecker_agent_cpu_requests }}
memory: {{ netchecker_agent_memory_requests }}
......@@ -24,3 +24,10 @@ spec:
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_agent_cpu_limit }}
memory: {{ netchecker_agent_memory_limit }}
requests:
cpu: {{ netchecker_agent_cpu_requests }}
memory: {{ netchecker_agent_memory_requests }}
......@@ -11,11 +11,25 @@ spec:
image: "{{ server_img }}"
env:
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_server_cpu_limit }}
memory: {{ netchecker_server_memory_limit }}
requests:
cpu: {{ netchecker_server_cpu_requests }}
memory: {{ netchecker_server_memory_requests }}
ports:
- containerPort: 8081
hostPort: 8081
- name: kubectl-proxy
image: "{{ kubectl_image }}"
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_kubectl_cpu_limit }}
memory: {{ netchecker_kubectl_memory_limit }}
requests:
cpu: {{ netchecker_kubectl_cpu_requests }}
memory: {{ netchecker_kubectl_memory_requests }}
args:
- proxy
......@@ -13,4 +13,16 @@ kube_apiserver_node_port_range: "30000-32767"
etcd_config_dir: /etc/ssl/etcd
etcd_cert_dir: "{{ etcd_config_dir }}/ssl"
# Limits for kube components
kube_controller_memory_limit: 512M
kube_controller_cpu_limit: 250m
kube_controller_memory_requests: 170M
kube_controller_cpu_requests: 100m
kube_scheduler_memory_limit: 512M
kube_scheduler_cpu_limit: 250m
kube_scheduler_memory_requests: 170M
kube_scheduler_cpu_requests: 100m
kube_apiserver_memory_limit: 2000M
kube_apiserver_cpu_limit: 800m
kube_apiserver_memory_requests: 256M
kube_apiserver_cpu_requests: 300m
......@@ -3,7 +3,7 @@
tags: k8s-pre-upgrade
- name: Copy kubectl from hyperkube container
command: "/usr/bin/docker run --rm -v {{ bin_dir }}:/systembindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp /hyperkube /systembindir/kubectl"
command: "{{ docker_bin_dir }}/docker run --rm -v {{ bin_dir }}:/systembindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp /hyperkube /systembindir/kubectl"
register: kube_task_result
until: kube_task_result.rc == 0
retries: 4
......
......@@ -12,6 +12,13 @@ spec:
- name: kube-apiserver
image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ kube_apiserver_cpu_limit }}
memory: {{ kube_apiserver_memory_limit }}
requests:
cpu: {{ kube_apiserver_cpu_requests }}
memory: {{ kube_apiserver_memory_requests }}
command:
- /hyperkube
- apiserver
......
......@@ -11,6 +11,13 @@ spec:
- name: kube-controller-manager
image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ kube_controller_cpu_limit }}
memory: {{ kube_controller_memory_limit }}
requests:
cpu: {{ kube_controller_cpu_requests }}
memory: {{ kube_controller_memory_requests }}
command:
- /hyperkube
- controller-manager
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment