From a56d9de502c7bf75d575255de34b995f47639e95 Mon Sep 17 00:00:00 2001
From: Bogdan Dobrelya <bdobrelia@mirantis.com>
Date: Fri, 23 Dec 2016 15:44:44 +0100
Subject: [PATCH] Systemd units, limits, and bin path fixes

* Add restart for weave service unit
* Reuse docker_bin_dir everythere
* Limit systemd managed docker containers by CPU/RAM. Do not configure native
  systemd limits due to the lack of consensus in the kernel community
  requires out-of-tree kernel patches.

Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
---
 docs/large-deployments.md                     |  7 +++++++
 inventory/group_vars/all.yml                  |  1 +
 roles/docker/handlers/main.yml                |  2 +-
 roles/docker/templates/docker.service.j2      |  2 +-
 roles/download/tasks/main.yml                 |  6 +++---
 .../download/tasks/set_docker_image_facts.yml |  2 +-
 roles/etcd/defaults/main.yml                  |  4 ++++
 roles/etcd/tasks/install.yml                  |  8 ++++----
 roles/etcd/tasks/pre_upgrade.yml              |  4 ++--
 roles/etcd/templates/deb-etcd-docker.initd.j2 |  4 ++--
 roles/etcd/templates/etcd-docker.service.j2   |  9 +++++----
 .../kubernetes-apps/ansible/defaults/main.yml | 20 +++++++++++++++++++
 .../templates/calico-policy-controller.yml.j2 |  7 +++++++
 .../ansible/templates/netchecker-agent-ds.yml |  7 +++++++
 .../templates/netchecker-agent-hostnet-ds.yml |  7 +++++++
 .../templates/netchecker-server-pod.yml       | 14 +++++++++++++
 roles/kubernetes/master/defaults/main.yml     | 14 ++++++++++++-
 roles/kubernetes/master/tasks/main.yml        |  2 +-
 .../manifests/kube-apiserver.manifest.j2      |  7 +++++++
 .../kube-controller-manager.manifest.j2       |  7 +++++++
 .../manifests/kube-scheduler.manifest.j2      |  7 +++++++
 roles/kubernetes/node/defaults/main.yml       | 12 +++++++++++
 .../node/templates/deb-kubelet.initd.j2       |  2 +-
 .../node/templates/kubelet-container.j2       |  3 ++-
 .../node/templates/kubelet.service.j2         |  4 ++--
 .../manifests/kube-proxy.manifest.j2          |  7 +++++++
 .../manifests/nginx-proxy.manifest.j2         |  7 +++++++
 roles/network_plugin/calico/defaults/main.yml | 14 +++++++++++++
 .../calico/rr/templates/calico-rr.service.j2  |  7 ++++---
 roles/network_plugin/calico/tasks/main.yml    |  4 ++--
 .../calico/templates/calico-node.service.j2   |  7 ++++---
 .../calico/templates/calicoctl-container.j2   |  5 +++--
 roles/network_plugin/canal/defaults/main.yml  | 10 ++++++++++
 roles/network_plugin/canal/tasks/main.yml     |  4 ++--
 .../canal/templates/canal-node.yml.j2         | 14 +++++++++++++
 roles/network_plugin/cloud/tasks/main.yml     |  2 +-
 .../network_plugin/flannel/defaults/main.yml  |  6 ++++++
 .../network_plugin/flannel/handlers/main.yml  |  2 +-
 .../flannel/templates/flannel-pod.yml         | 10 +++++++---
 roles/network_plugin/weave/defaults/main.yml  |  4 ++++
 roles/network_plugin/weave/tasks/main.yml     |  2 +-
 roles/network_plugin/weave/templates/weave.j2 |  1 +
 .../weave/templates/weave.service.j2          |  5 +++--
 .../weave/templates/weaveproxy.service.j2     |  6 +++---
 roles/reset/tasks/main.yml                    |  2 +-
 scripts/collect-info.yaml                     |  6 ++++--
 46 files changed, 237 insertions(+), 50 deletions(-)
 create mode 100644 roles/network_plugin/weave/defaults/main.yml

diff --git a/docs/large-deployments.md b/docs/large-deployments.md
index 473f83954..20bc7fefd 100644
--- a/docs/large-deployments.md
+++ b/docs/large-deployments.md
@@ -20,5 +20,12 @@ For a large scaled deployments, consider the following configuration changes:
   ``dns_cpu_requests``, ``dns_memory_limit``, ``dns_memory_requests``.
   Please note that limits must always be greater than or equal to requests.
 
+* Tune CPU/memory limits and requests. Those are located in roles' defaults
+  and named like ``foo_memory_limit``, ``foo_memory_requests`` and
+  ``foo_cpu_limit``, ``foo_cpu_requests``. Note that 'Mi' memory units for K8s
+  will be submitted as 'M', if applied for ``docker run``, and cpu K8s units will
+  end up with the 'm' skipped for docker as well. This is required as docker does not
+  understand k8s units well.
+
 For example, when deploying 200 nodes, you may want to run ansible with
 ``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.
diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml
index cc9a8b874..a73612b0d 100644
--- a/inventory/group_vars/all.yml
+++ b/inventory/group_vars/all.yml
@@ -188,6 +188,7 @@ docker_daemon_graph: "/var/lib/docker"
 ## An obvious use case is allowing insecure-registry access
 ## to self hosted registries like so:
 docker_options: "--insecure-registry={{ kube_service_addresses }} --graph={{ docker_daemon_graph }}"
+docker_bin_dir: "/usr/bin"
 
 ## Uncomment this if you want to force overlay/overlay2 as docker storage driver
 ## Please note that overlay2 is only supported on newer kernels
diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml
index fd866a109..4f4c0c8c0 100644
--- a/roles/docker/handlers/main.yml
+++ b/roles/docker/handlers/main.yml
@@ -27,7 +27,7 @@
   pause: seconds=10 prompt="Waiting for docker restart"
 
 - name: Docker | wait for docker
-  command: /usr/bin/docker images
+  command: "{{ docker_bin_dir }}/docker images"
   register: docker_ready
   retries: 10
   delay: 5
diff --git a/roles/docker/templates/docker.service.j2 b/roles/docker/templates/docker.service.j2
index 39e460e96..6838868cd 100644
--- a/roles/docker/templates/docker.service.j2
+++ b/roles/docker/templates/docker.service.j2
@@ -18,7 +18,7 @@ Environment=GOTRACEBACK=crash
 ExecReload=/bin/kill -s HUP $MAINPID
 Delegate=yes
 KillMode=process
-ExecStart=/usr/bin/docker daemon \
+ExecStart={{ docker_bin_dir }}/docker daemon \
           $DOCKER_OPTS \
           $DOCKER_STORAGE_OPTIONS \
           $DOCKER_NETWORK_OPTIONS \
diff --git a/roles/download/tasks/main.yml b/roles/download/tasks/main.yml
index e1859a1e6..7ac38449d 100644
--- a/roles/download/tasks/main.yml
+++ b/roles/download/tasks/main.yml
@@ -79,7 +79,7 @@
 
 #NOTE(bogdando) this brings no docker-py deps for nodes
 - name: Download containers if pull is required or told to always pull
-  command: "/usr/bin/docker pull {{ pull_args }}"
+  command: "{{ docker_bin_dir }}/docker pull {{ pull_args }}"
   register: pull_task_result
   until: pull_task_result|success
   retries: 4
@@ -115,7 +115,7 @@
   tags: facts
 
 - name: Download | save container images
-  shell: docker save "{{ pull_args }}" | gzip -{{ download_compress }} > "{{ fname }}"
+  shell: "{{ docker_bin_dir }}/docker save {{ pull_args }} | gzip -{{ download_compress }} > {{ fname }}"
   delegate_to: "{{ download_delegate }}"
   register: saved
   run_once: true
@@ -145,6 +145,6 @@
   tags: [upload, upgrade]
 
 - name: Download | load container images
-  shell: docker load < "{{ fname }}"
+  shell: "{{ docker_bin_dir }}/docker load < {{ fname }}"
   when: (ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] or download_delegate == "localhost") and download_run_once|bool and download.enabled|bool and download.container|bool
   tags: [upload, upgrade]
diff --git a/roles/download/tasks/set_docker_image_facts.yml b/roles/download/tasks/set_docker_image_facts.yml
index 69048c513..451e0de2b 100644
--- a/roles/download/tasks/set_docker_image_facts.yml
+++ b/roles/download/tasks/set_docker_image_facts.yml
@@ -8,7 +8,7 @@
       {%- if pull_by_digest|bool %}{{download.repo}}@sha256:{{download.sha256}}{%- else -%}{{download.repo}}:{{download.tag}}{%- endif -%}
 
 - name: Register docker images info
-  shell: "{% raw %}/usr/bin/docker images -q | xargs /usr/bin/docker inspect -f '{{.RepoTags}},{{.RepoDigests}}'{% endraw %}"
+  shell: "{{ docker_bin_dir }}/docker images -q | xargs {{ docker_bin_dir }}/docker inspect -f {% raw %}'{{.RepoTags}},{{.RepoDigests}}'{% endraw %}"
   register: docker_images_raw
   failed_when: false
   when: not download_always_pull|bool
diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml
index 2df4ba165..e733fe56d 100644
--- a/roles/etcd/defaults/main.yml
+++ b/roles/etcd/defaults/main.yml
@@ -6,3 +6,7 @@ etcd_cert_dir: "{{ etcd_config_dir }}/ssl"
 etcd_cert_group: root
 
 etcd_script_dir: "{{ bin_dir }}/etcd-scripts"
+
+# Limits
+etcd_memory_limit: 512M
+etcd_cpu_limit: 300m
diff --git a/roles/etcd/tasks/install.yml b/roles/etcd/tasks/install.yml
index aa7f32ca3..0ed3f4154 100644
--- a/roles/etcd/tasks/install.yml
+++ b/roles/etcd/tasks/install.yml
@@ -12,10 +12,10 @@
 
 #Plan A: no docker-py deps
 - name: Install | Copy etcdctl binary from container
-  command: sh -c "/usr/bin/docker rm -f etcdctl-binarycopy;
-           /usr/bin/docker create --name etcdctl-binarycopy {{ etcd_image_repo }}:{{ etcd_image_tag }} &&
-           /usr/bin/docker cp etcdctl-binarycopy:{{ etcd_container_bin_dir }}etcdctl {{ bin_dir }}/etcdctl &&
-           /usr/bin/docker rm -f etcdctl-binarycopy"
+  command: sh -c "{{ docker_bin_dir }}/docker rm -f etcdctl-binarycopy;
+           {{ docker_bin_dir }}/docker create --name etcdctl-binarycopy {{ etcd_image_repo }}:{{ etcd_image_tag }} &&
+           {{ docker_bin_dir }}/docker cp etcdctl-binarycopy:{{ etcd_container_bin_dir }}etcdctl {{ bin_dir }}/etcdctl &&
+           {{ docker_bin_dir }}/docker rm -f etcdctl-binarycopy"
   when: etcd_deployment_type == "docker"
   register: etcd_task_result
   until: etcd_task_result.rc == 0
diff --git a/roles/etcd/tasks/pre_upgrade.yml b/roles/etcd/tasks/pre_upgrade.yml
index 081702c4a..eb17e9871 100644
--- a/roles/etcd/tasks/pre_upgrade.yml
+++ b/roles/etcd/tasks/pre_upgrade.yml
@@ -26,12 +26,12 @@
     - /etc/init.d/etcd-proxy
 
 - name: "Pre-upgrade | find etcd-proxy container"
-  command: docker ps -aq --filter "name=etcd-proxy*"
+  command: "{{ docker_bin_dir }}/docker ps -aq --filter 'name=etcd-proxy*'"
   register: etcd_proxy_container
   failed_when: false
 
 - name: "Pre-upgrade | remove etcd-proxy if it exists"
-  command: "docker rm -f {{item}}"
+  command: "{{ docker_bin_dir }}/docker rm -f {{item}}"
   with_items: "{{etcd_proxy_container.stdout_lines}}"
 
 - name: "Pre-upgrade | check if member list is non-SSL"
diff --git a/roles/etcd/templates/deb-etcd-docker.initd.j2 b/roles/etcd/templates/deb-etcd-docker.initd.j2
index 0489cd2f5..b8ae568e9 100644
--- a/roles/etcd/templates/deb-etcd-docker.initd.j2
+++ b/roles/etcd/templates/deb-etcd-docker.initd.j2
@@ -15,7 +15,7 @@ set -a
 PATH=/sbin:/usr/sbin:/bin/:/usr/bin
 DESC="etcd k/v store"
 NAME=etcd
-DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker
+DAEMON={{ docker_bin_dir }}/docker
 DAEMON_EXEC=`basename $DAEMON`
 DAEMON_ARGS="run --restart=on-failure:5 --env-file=/etc/etcd.env \
 --net=host \
@@ -50,7 +50,7 @@ do_status()
 #
 do_start()
 {
-    {{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }} &>/dev/null || true
+    {{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }} &>/dev/null || true
     sleep 1
     start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
         $DAEMON_ARGS \
diff --git a/roles/etcd/templates/etcd-docker.service.j2 b/roles/etcd/templates/etcd-docker.service.j2
index d18a91f42..223d2d842 100644
--- a/roles/etcd/templates/etcd-docker.service.j2
+++ b/roles/etcd/templates/etcd-docker.service.j2
@@ -6,7 +6,7 @@ After=docker.service
 [Service]
 User=root
 PermissionsStartOnly=true
-ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=on-failure:5 \
+ExecStart={{ docker_bin_dir }}/docker run --restart=on-failure:5 \
 --env-file=/etc/etcd.env \
 {# TODO(mattymo): Allow docker IP binding and disable in envfile
    -p 2380:2380 -p 2379:2379 #}
@@ -14,14 +14,15 @@ ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=on-fai
 -v /etc/ssl/certs:/etc/ssl/certs:ro \
 -v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \
 -v /var/lib/etcd:/var/lib/etcd:rw \
+--memory={{ etcd_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ etcd_cpu_limit|regex_replace('m', '') }} \
 --name={{ etcd_member_name | default("etcd") }} \
 {{ etcd_image_repo }}:{{ etcd_image_tag }} \
 {% if etcd_after_v3 %}
 {{ etcd_container_bin_dir }}etcd
 {% endif %}
-ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }}
-ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_member_name | default("etcd-proxy") }}
-ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_member_name | default("etcd-proxy") }}
+ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }}
+ExecReload={{ docker_bin_dir }}/docker restart {{ etcd_member_name | default("etcd") }}
+ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name | default("etcd") }}
 Restart=always
 RestartSec=15s
 
diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml
index dd2bd2d8a..90a5702bb 100644
--- a/roles/kubernetes-apps/ansible/defaults/main.yml
+++ b/roles/kubernetes-apps/ansible/defaults/main.yml
@@ -20,6 +20,12 @@ exechealthz_image_tag: "{{ exechealthz_version }}"
 calico_policy_image_repo: "calico/kube-policy-controller"
 calico_policy_image_tag: latest
 
+# Limits for calico apps
+calico_policy_controller_cpu_limit: 100m
+calico_policy_controller_memory_limit: 256M
+calico_policy_controller_cpu_requests: 30m
+calico_policy_controller_memory_requests: 128M
+
 # Netchecker
 deploy_netchecker: false
 netchecker_port: 31081
@@ -29,5 +35,19 @@ agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
 server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
 kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty"
 
+# Limits for netchecker apps
+netchecker_agent_cpu_limit: 30m
+netchecker_agent_memory_limit: 100M
+netchecker_agent_cpu_requests: 15m
+netchecker_agent_memory_requests: 64M
+netchecker_server_cpu_limit: 100m
+netchecker_server_memory_limit: 256M
+netchecker_server_cpu_requests: 50m
+netchecker_server_memory_requests: 128M
+netchecker_kubectl_cpu_limit: 30m
+netchecker_kubectl_memory_limit: 128M
+netchecker_kubectl_cpu_requests: 15m
+netchecker_kubectl_memory_requests: 64M
+
 # SSL
 etcd_cert_dir: "/etc/ssl/etcd/ssl"
diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2
index eb98267f3..c92328f15 100644
--- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2
+++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2
@@ -25,6 +25,13 @@ spec:
         - name: calico-policy-controller
           image: {{ calico_policy_image_repo }}:{{ calico_policy_image_tag }}
           imagePullPolicy: {{ k8s_image_pull_policy }}
+          resources:
+            limits:
+              cpu: {{ calico_policy_controller_cpu_limit }}
+              memory: {{ calico_policy_controller_memory_limit }}
+            requests:
+              cpu: {{ calico_policy_controller_cpu_requests }}
+              memory: {{ calico_policy_controller_memory_requests }}
           env:
             - name: ETCD_ENDPOINTS
               value: "{{ etcd_access_endpoint }}"
diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml b/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml
index a52329e50..41900ab33 100644
--- a/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml
+++ b/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml
@@ -23,3 +23,10 @@ spec:
             - name: REPORT_INTERVAL
               value: '{{ agent_report_interval }}'
           imagePullPolicy: {{ k8s_image_pull_policy }}
+          resources:
+            limits:
+              cpu: {{ netchecker_agent_cpu_limit }}
+              memory: {{ netchecker_agent_memory_limit }}
+            requests:
+              cpu: {{ netchecker_agent_cpu_requests }}
+              memory: {{ netchecker_agent_memory_requests }}
diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml b/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml
index 4fd03e80a..5a6a63f36 100644
--- a/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml
+++ b/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml
@@ -24,3 +24,10 @@ spec:
             - name: REPORT_INTERVAL
               value: '{{ agent_report_interval }}'
           imagePullPolicy: {{ k8s_image_pull_policy }}
+          resources:
+            limits:
+              cpu: {{ netchecker_agent_cpu_limit }}
+              memory: {{ netchecker_agent_memory_limit }}
+            requests:
+              cpu: {{ netchecker_agent_cpu_requests }}
+              memory: {{ netchecker_agent_memory_requests }}
diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml b/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml
index 6f242bc51..c1d8ddb9f 100644
--- a/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml
+++ b/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml
@@ -11,11 +11,25 @@ spec:
       image: "{{ server_img }}"
       env:
       imagePullPolicy: {{ k8s_image_pull_policy }}
+      resources:
+        limits:
+          cpu: {{ netchecker_server_cpu_limit }}
+          memory: {{ netchecker_server_memory_limit }}
+        requests:
+          cpu: {{ netchecker_server_cpu_requests }}
+          memory: {{ netchecker_server_memory_requests }}
       ports:
         - containerPort: 8081
           hostPort: 8081
     - name: kubectl-proxy
       image: "{{ kubectl_image }}"
       imagePullPolicy: {{ k8s_image_pull_policy }}
+      resources:
+        limits:
+          cpu: {{ netchecker_kubectl_cpu_limit }}
+          memory: {{ netchecker_kubectl_memory_limit }}
+        requests:
+          cpu: {{ netchecker_kubectl_cpu_requests }}
+          memory: {{ netchecker_kubectl_memory_requests }}
       args:
         - proxy
diff --git a/roles/kubernetes/master/defaults/main.yml b/roles/kubernetes/master/defaults/main.yml
index c1fbbb583..874925adf 100644
--- a/roles/kubernetes/master/defaults/main.yml
+++ b/roles/kubernetes/master/defaults/main.yml
@@ -13,4 +13,16 @@ kube_apiserver_node_port_range: "30000-32767"
 etcd_config_dir: /etc/ssl/etcd
 etcd_cert_dir: "{{ etcd_config_dir }}/ssl"
 
-
+# Limits for kube components
+kube_controller_memory_limit: 512M
+kube_controller_cpu_limit: 250m
+kube_controller_memory_requests: 170M
+kube_controller_cpu_requests: 100m
+kube_scheduler_memory_limit: 512M
+kube_scheduler_cpu_limit: 250m
+kube_scheduler_memory_requests: 170M
+kube_scheduler_cpu_requests: 100m
+kube_apiserver_memory_limit: 2000M
+kube_apiserver_cpu_limit: 800m
+kube_apiserver_memory_requests: 256M
+kube_apiserver_cpu_requests: 300m
diff --git a/roles/kubernetes/master/tasks/main.yml b/roles/kubernetes/master/tasks/main.yml
index e1b5cc5d2..f7b561578 100644
--- a/roles/kubernetes/master/tasks/main.yml
+++ b/roles/kubernetes/master/tasks/main.yml
@@ -3,7 +3,7 @@
   tags: k8s-pre-upgrade
 
 - name: Copy kubectl from hyperkube container
-  command: "/usr/bin/docker run --rm -v {{ bin_dir }}:/systembindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp /hyperkube /systembindir/kubectl"
+  command: "{{ docker_bin_dir }}/docker run --rm -v {{ bin_dir }}:/systembindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp /hyperkube /systembindir/kubectl"
   register: kube_task_result
   until: kube_task_result.rc == 0
   retries: 4
diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2
index 65505526d..c05030697 100644
--- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2
+++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2
@@ -12,6 +12,13 @@ spec:
   - name: kube-apiserver
     image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
     imagePullPolicy: {{ k8s_image_pull_policy }}
+    resources:
+      limits:
+        cpu: {{ kube_apiserver_cpu_limit }}
+        memory: {{ kube_apiserver_memory_limit }}
+      requests:
+        cpu: {{ kube_apiserver_cpu_requests }}
+        memory: {{ kube_apiserver_memory_requests }}
     command:
     - /hyperkube
     - apiserver
diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2
index 8f7580eb5..49dd05ba8 100644
--- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2
+++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2
@@ -11,6 +11,13 @@ spec:
   - name: kube-controller-manager
     image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
     imagePullPolicy: {{ k8s_image_pull_policy }}
+    resources:
+      limits:
+        cpu: {{ kube_controller_cpu_limit }}
+        memory: {{ kube_controller_memory_limit }}
+      requests:
+        cpu: {{ kube_controller_cpu_requests }}
+        memory: {{ kube_controller_memory_requests }}
     command:
     - /hyperkube
     - controller-manager
diff --git a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2
index a2c4c134a..781e38d7b 100644
--- a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2
+++ b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2
@@ -11,6 +11,13 @@ spec:
   - name: kube-scheduler
     image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
     imagePullPolicy: {{ k8s_image_pull_policy }}
+    resources:
+      limits:
+        cpu: {{ kube_scheduler_cpu_limit }}
+        memory: {{ kube_scheduler_memory_limit }}
+      requests:
+        cpu: {{ kube_scheduler_cpu_requests }}
+        memory: {{ kube_scheduler_memory_requests }}
     command:
     - /hyperkube
     - scheduler
diff --git a/roles/kubernetes/node/defaults/main.yml b/roles/kubernetes/node/defaults/main.yml
index b0f73e50d..99ed2bdae 100644
--- a/roles/kubernetes/node/defaults/main.yml
+++ b/roles/kubernetes/node/defaults/main.yml
@@ -9,6 +9,18 @@ kube_proxy_mode: iptables
 # If using the pure iptables proxy, SNAT everything
 kube_proxy_masquerade_all: true
 
+# Limits for kube components and nginx load balancer app
+kubelet_memory_limit: 512M
+kubelet_cpu_limit: 100m
+kube_proxy_memory_limit: 2000M
+kube_proxy_cpu_limit: 500m
+kube_proxy_memory_requests: 256M
+kube_proxy_cpu_requests: 150m
+nginx_memory_limit: 512M
+nginx_cpu_limit: 300m
+nginx_memory_requests: 64M
+nginx_cpu_requests: 50m
+
 # kube_api_runtime_config:
 #   - extensions/v1beta1/daemonsets=true
 #   - extensions/v1beta1/deployments=true
diff --git a/roles/kubernetes/node/templates/deb-kubelet.initd.j2 b/roles/kubernetes/node/templates/deb-kubelet.initd.j2
index 6f349b8f2..194506e89 100644
--- a/roles/kubernetes/node/templates/deb-kubelet.initd.j2
+++ b/roles/kubernetes/node/templates/deb-kubelet.initd.j2
@@ -39,7 +39,7 @@ DAEMON_USER=root
 #
 do_start()
 {
-        /usr/bin/docker rm -f kubelet &>/dev/null || true
+        {{ docker_bin_dir }}/docker rm -f kubelet &>/dev/null || true
         sleep 1
         # Return
         #   0 if daemon has been started
diff --git a/roles/kubernetes/node/templates/kubelet-container.j2 b/roles/kubernetes/node/templates/kubelet-container.j2
index 7d4f536ab..c97c6f0de 100644
--- a/roles/kubernetes/node/templates/kubelet-container.j2
+++ b/roles/kubernetes/node/templates/kubelet-container.j2
@@ -1,5 +1,5 @@
 #!/bin/bash
-/usr/bin/docker run --privileged \
+{{ docker_bin_dir }}/docker run --privileged \
 --net=host --pid=host --name=kubelet --restart=on-failure:5 \
 -v /etc/cni:/etc/cni:ro \
 -v /opt/cni:/opt/cni:ro \
@@ -9,6 +9,7 @@
 -v {{ docker_daemon_graph }}:/var/lib/docker \
 -v /var/run:/var/run \
 -v /var/lib/kubelet:/var/lib/kubelet \
+--memory={{ kubelet_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ kubelet_cpu_limit|regex_replace('m', '')  }} \
 {{ hyperkube_image_repo }}:{{ hyperkube_image_tag}} \
 nsenter --target=1 --mount --wd=. -- \
 ./hyperkube kubelet \
diff --git a/roles/kubernetes/node/templates/kubelet.service.j2 b/roles/kubernetes/node/templates/kubelet.service.j2
index d8d5ec8a8..e3bf40878 100644
--- a/roles/kubernetes/node/templates/kubelet.service.j2
+++ b/roles/kubernetes/node/templates/kubelet.service.j2
@@ -23,8 +23,8 @@ ExecStart={{ bin_dir }}/kubelet \
 		$DOCKER_SOCKET \
 		$KUBELET_NETWORK_PLUGIN \
 		$KUBELET_CLOUDPROVIDER
-ExecStartPre=-/usr/bin/docker rm -f kubelet
-ExecReload=/usr/bin/docker restart kubelet
+ExecStartPre=-{{ docker_bin_dir }}/docker rm -f kubelet
+ExecReload={{ docker_bin_dir }}/docker restart kubelet
 Restart=always
 RestartSec=10s
 
diff --git a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2
index 694ee1e36..a965ef792 100644
--- a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2
+++ b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2
@@ -11,6 +11,13 @@ spec:
   - name: kube-proxy
     image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
     imagePullPolicy: {{ k8s_image_pull_policy }}
+    resources:
+      limits:
+        cpu: {{ kube_proxy_cpu_limit }}
+        memory: {{ kube_proxy_memory_limit }}
+      requests:
+        cpu: {{ kube_proxy_cpu_requests }}
+        memory: {{ kube_proxy_memory_requests }}
     command:
     - /hyperkube
     - proxy
diff --git a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2
index db15bd2b9..2d566cad1 100644
--- a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2
+++ b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2
@@ -11,6 +11,13 @@ spec:
   - name: nginx-proxy
     image: {{ nginx_image_repo }}:{{ nginx_image_tag }}
     imagePullPolicy: {{ k8s_image_pull_policy }}
+    resources:
+      limits:
+        cpu: {{ nginx_cpu_limit }}
+        memory: {{ nginx_memory_limit }}
+      requests:
+        cpu: {{ nginx_cpu_requests }}
+        memory: {{ nginx_memory_requests }}
     securityContext:
       privileged: true
     volumeMounts:
diff --git a/roles/network_plugin/calico/defaults/main.yml b/roles/network_plugin/calico/defaults/main.yml
index 391e7c53e..6718fdbc5 100644
--- a/roles/network_plugin/calico/defaults/main.yml
+++ b/roles/network_plugin/calico/defaults/main.yml
@@ -19,3 +19,17 @@ global_as_num: "64512"
 # not be specified in calico CNI config, so Calico will use built-in
 # defaults. The value should be a number, not a string.
 # calico_mtu: 1500
+
+# Limits for apps
+calico_rr_memory_limit: 1000M
+calico_rr_cpu_limit: 300m
+calico_rr_memory_requests: 500M
+calico_rr_cpu_requests: 150m
+calico_node_memory_limit: 500M
+calico_node_cpu_limit: 300m
+calico_node_memory_requests: 256M
+calico_node_cpu_requests: 150m
+calicoctl_memory_limit: 170M
+calicoctl_cpu_limit: 100m
+calicoctl_memory_requests: 70M
+calicoctl_cpu_requests: 50m
diff --git a/roles/network_plugin/calico/rr/templates/calico-rr.service.j2 b/roles/network_plugin/calico/rr/templates/calico-rr.service.j2
index 1a4b3e977..f6da04a4d 100644
--- a/roles/network_plugin/calico/rr/templates/calico-rr.service.j2
+++ b/roles/network_plugin/calico/rr/templates/calico-rr.service.j2
@@ -5,8 +5,8 @@ Requires=docker.service
 
 [Service]
 EnvironmentFile=/etc/calico/calico-rr.env
-ExecStartPre=-/usr/bin/docker rm -f calico-rr
-ExecStart=/usr/bin/docker run --net=host --privileged \
+ExecStartPre=-{{ docker_bin_dir }}/docker rm -f calico-rr
+ExecStart={{ docker_bin_dir }}/docker run --net=host --privileged \
  --name=calico-rr \
  -e IP=${IP} \
  -e IP6=${IP6} \
@@ -16,12 +16,13 @@ ExecStart=/usr/bin/docker run --net=host --privileged \
  -e ETCD_KEY_FILE=${ETCD_KEY_FILE} \
  -v /var/log/calico-rr:/var/log/calico \
  -v {{ calico_cert_dir }}:{{ calico_cert_dir }}:ro \
+ --memory={{ calico_rr_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ calico_rr_cpu_limit|regex_replace('m', '') }} \
  {{ calico_rr_image_repo }}:{{ calico_rr_image_tag }}
 
 Restart=always
 RestartSec=10s
 
-ExecStop=-/usr/bin/docker stop calico-rr
+ExecStop=-{{ docker_bin_dir }}/docker stop calico-rr
 
 [Install]
 WantedBy=multi-user.target
diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml
index 81979a910..ae6e0e4d4 100644
--- a/roles/network_plugin/calico/tasks/main.yml
+++ b/roles/network_plugin/calico/tasks/main.yml
@@ -41,7 +41,7 @@
   notify: restart calico-node
 
 - name: Calico | Copy cni plugins from hyperkube
-  command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
+  command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
   register: cni_task_result
   until: cni_task_result.rc == 0
   retries: 4
@@ -50,7 +50,7 @@
   tags: [hyperkube, upgrade]
 
 - name: Calico | Copy cni plugins from calico/cni container
-  command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
+  command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
   register: cni_task_result
   until: cni_task_result.rc == 0
   retries: 4
diff --git a/roles/network_plugin/calico/templates/calico-node.service.j2 b/roles/network_plugin/calico/templates/calico-node.service.j2
index dc041b054..dfe8c4a3d 100644
--- a/roles/network_plugin/calico/templates/calico-node.service.j2
+++ b/roles/network_plugin/calico/templates/calico-node.service.j2
@@ -5,8 +5,8 @@ Requires=docker.service
 
 [Service]
 EnvironmentFile=/etc/calico/calico.env
-ExecStartPre=-/usr/bin/docker rm -f calico-node
-ExecStart=/usr/bin/docker run --net=host --privileged \
+ExecStartPre=-{{ docker_bin_dir }}/docker rm -f calico-node
+ExecStart={{ docker_bin_dir }}/docker run --net=host --privileged \
  --name=calico-node \
  -e HOSTNAME=${CALICO_HOSTNAME} \
  -e IP=${CALICO_IP} \
@@ -24,12 +24,13 @@ ExecStart=/usr/bin/docker run --net=host --privileged \
  -v /lib/modules:/lib/modules \
  -v /var/run/calico:/var/run/calico \
  -v {{ calico_cert_dir }}:{{ calico_cert_dir }}:ro \
+ --memory={{ calico_node_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ calico_node_cpu_limit|regex_replace('m', '') }} \
  {{ calico_node_image_repo }}:{{ calico_node_image_tag }}
 
 Restart=always
 RestartSec=10s
 
-ExecStop=-/usr/bin/docker stop calico-node
+ExecStop=-{{ docker_bin_dir }}/docker stop calico-node
 
 [Install]
 WantedBy=multi-user.target
diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2
index 7be30928a..0ecfba0c1 100644
--- a/roles/network_plugin/calico/templates/calicoctl-container.j2
+++ b/roles/network_plugin/calico/templates/calicoctl-container.j2
@@ -1,13 +1,14 @@
 #!/bin/bash
-/usr/bin/docker run -i --privileged --rm \
+{{ docker_bin_dir }}/docker run -i --privileged --rm \
 --net=host --pid=host \
 -e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \
 -e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \
 -e ETCD_CERT_FILE=/etc/calico/certs/cert.crt \
 -e ETCD_KEY_FILE=/etc/calico/certs/key.pem \
--v /usr/bin/docker:/usr/bin/docker \
+-v {{ docker_bin_dir }}/docker:{{ docker_bin_dir }}/docker \
 -v /var/run/docker.sock:/var/run/docker.sock \
 -v /var/run/calico:/var/run/calico \
 -v /etc/calico/certs:/etc/calico/certs:ro \
+--memory={{ calicoctl_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ calicoctl_cpu_limit|regex_replace('m', '') }} \
 {{ calicoctl_image_repo }}:{{ calicoctl_image_tag}} \
 $@
diff --git a/roles/network_plugin/canal/defaults/main.yml b/roles/network_plugin/canal/defaults/main.yml
index d67d593f5..f8482fb1a 100644
--- a/roles/network_plugin/canal/defaults/main.yml
+++ b/roles/network_plugin/canal/defaults/main.yml
@@ -13,3 +13,13 @@ canal_log_level: "info"
 # Etcd SSL dirs
 canal_cert_dir: /etc/canal/certs
 etcd_cert_dir: /etc/ssl/etcd/ssl
+
+# Limits for apps
+calico_node_memory_limit: 500M
+calico_node_cpu_limit: 200m
+calico_node_memory_requests: 256M
+calico_node_cpu_requests: 100m
+flannel_memory_limit: 500M
+flannel_cpu_limit: 200m
+flannel_memory_requests: 256M
+flannel_cpu_requests: 100m
diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml
index fec09cb48..3d3b19bdc 100644
--- a/roles/network_plugin/canal/tasks/main.yml
+++ b/roles/network_plugin/canal/tasks/main.yml
@@ -43,7 +43,7 @@
     dest: "{{kube_config_dir}}/canal-node.yaml"
 
 - name: Canal | Copy cni plugins from hyperkube
-  command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
+  command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
   register: cni_task_result
   until: cni_task_result.rc == 0
   retries: 4
@@ -52,7 +52,7 @@
   tags: [hyperkube, upgrade]
 
 - name: Canal | Copy cni plugins from calico/cni
-  command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
+  command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
   register: cni_task_result
   until: cni_task_result.rc == 0
   retries: 4
diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2
index f73fae9bd..37baf06e0 100644
--- a/roles/network_plugin/canal/templates/canal-node.yml.j2
+++ b/roles/network_plugin/canal/templates/canal-node.yml.j2
@@ -49,6 +49,13 @@ spec:
         - name: flannel
           image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}"
           imagePullPolicy: {{ k8s_image_pull_policy }}
+          resources:
+            limits:
+              cpu: {{ flannel_cpu_limit }}
+              memory: {{ flannel_memory_limit }}
+            requests:
+              cpu: {{ flannel_cpu_requests }}
+              memory: {{ flannel_memory_requests }}
           env:
             # Cluster name
             - name: CLUSTER_NAME
@@ -119,6 +126,13 @@ spec:
         - name: calico-node
           image: "{{ calico_node_image_repo }}:{{ calico_node_image_tag }}"
           imagePullPolicy: {{ k8s_image_pull_policy }}
+          resources:
+            limits:
+              cpu: {{ calico_node_cpu_limit }}
+              memory: {{ calico_node_memory_limit }}
+            requests:
+              cpu: {{ calico_node_cpu_requests }}
+              memory: {{ calico_node_memory_requests }}
           env:
             # The location of the etcd cluster.
             - name: ETCD_ENDPOINTS
diff --git a/roles/network_plugin/cloud/tasks/main.yml b/roles/network_plugin/cloud/tasks/main.yml
index c8ae77830..346a57969 100644
--- a/roles/network_plugin/cloud/tasks/main.yml
+++ b/roles/network_plugin/cloud/tasks/main.yml
@@ -1,7 +1,7 @@
 ---
 
 - name: Cloud | Copy cni plugins from hyperkube
-  command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
+  command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
   register: cni_task_result
   until: cni_task_result.rc == 0
   retries: 4
diff --git a/roles/network_plugin/flannel/defaults/main.yml b/roles/network_plugin/flannel/defaults/main.yml
index ce00090ec..b6768f1bd 100644
--- a/roles/network_plugin/flannel/defaults/main.yml
+++ b/roles/network_plugin/flannel/defaults/main.yml
@@ -10,3 +10,9 @@ flannel_public_ip: "{{ access_ip|default(ip|default(ansible_default_ipv4.address
 # You can choose what type of flannel backend to use
 # please refer to flannel's docs : https://github.com/coreos/flannel/blob/master/README.md
 flannel_backend_type: "vxlan"
+
+# Limits for apps
+flannel_memory_limit: 500M
+flannel_cpu_limit: 300m
+flannel_memory_requests: 256M
+flannel_cpu_requests: 150m
diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml
index 0f2734264..e393b6163 100644
--- a/roles/network_plugin/flannel/handlers/main.yml
+++ b/roles/network_plugin/flannel/handlers/main.yml
@@ -32,7 +32,7 @@
   pause: seconds=10 prompt="Waiting for docker restart"
 
 - name: Flannel | wait for docker
-  command: /usr/bin/docker images
+  command: "{{ docker_bin_dir }}/docker images"
   register: docker_ready
   retries: 10
   delay: 5
diff --git a/roles/network_plugin/flannel/templates/flannel-pod.yml b/roles/network_plugin/flannel/templates/flannel-pod.yml
index 2edd9ada1..f9b76ce5f 100644
--- a/roles/network_plugin/flannel/templates/flannel-pod.yml
+++ b/roles/network_plugin/flannel/templates/flannel-pod.yml
@@ -19,6 +19,13 @@
       - name: "flannel-container"
         image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}"
         imagePullPolicy: {{ k8s_image_pull_policy }}
+        resources:
+          limits:
+            cpu: {{ flannel_cpu_limit }}
+            memory: {{ flannel_memory_limit }}
+          requests:
+            cpu: {{ flannel_cpu_requests }}
+            memory: {{ flannel_memory_requests }}
         command:
           - "/bin/sh"
           - "-c"
@@ -26,9 +33,6 @@
         ports:
           - hostPort: 10253
             containerPort: 10253
-        resources:
-          limits:
-            cpu: "100m"
         volumeMounts:
           - name: "subnetenv"
             mountPath: "/run/flannel"
diff --git a/roles/network_plugin/weave/defaults/main.yml b/roles/network_plugin/weave/defaults/main.yml
new file mode 100644
index 000000000..4aabcac6f
--- /dev/null
+++ b/roles/network_plugin/weave/defaults/main.yml
@@ -0,0 +1,4 @@
+---
+# Limits
+weave_memory_limit: 500M
+weave_cpu_limit: 300m
diff --git a/roles/network_plugin/weave/tasks/main.yml b/roles/network_plugin/weave/tasks/main.yml
index e74c1c334..9609ea141 100644
--- a/roles/network_plugin/weave/tasks/main.yml
+++ b/roles/network_plugin/weave/tasks/main.yml
@@ -1,6 +1,6 @@
 ---
 - name: Weave | Copy cni plugins from hyperkube
-  command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
+  command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
   register: cni_task_result
   until: cni_task_result.rc == 0
   retries: 4
diff --git a/roles/network_plugin/weave/templates/weave.j2 b/roles/network_plugin/weave/templates/weave.j2
index 865eb96a7..f1e92c25c 100644
--- a/roles/network_plugin/weave/templates/weave.j2
+++ b/roles/network_plugin/weave/templates/weave.j2
@@ -1,3 +1,4 @@
+WEAVE_DOCKER_ARGS="--memory={{ weave_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ weave_cpu_limit|regex_replace('m', '') }}"
 WEAVE_PEERS="{% for host in groups['k8s-cluster'] %}{{ hostvars[host]['access_ip'] | default(hostvars[host]['ip'] | default(hostvars[host]['ansible_default_ipv4']['address'])) }}{% if not loop.last %} {% endif %}{% endfor %}"
 WEAVEPROXY_ARGS="--rewrite-inspect --without-dns"
 WEAVE_SUBNET="--ipalloc-range {{ kube_pods_subnet }}"
diff --git a/roles/network_plugin/weave/templates/weave.service.j2 b/roles/network_plugin/weave/templates/weave.service.j2
index 2df0cb989..6c2aad249 100644
--- a/roles/network_plugin/weave/templates/weave.service.j2
+++ b/roles/network_plugin/weave/templates/weave.service.j2
@@ -6,12 +6,13 @@ After=docker.service docker.socket
 
 [Service]
 EnvironmentFile=-/etc/weave.env
-ExecStartPre=-/usr/bin/docker rm -f weave
+ExecStartPre=-{{ docker_bin_dir }}/docker rm -f weave
 ExecStartPre={{ bin_dir }}/weave launch-router \
             $WEAVE_SUBNET \
             $WEAVE_PEERS
-ExecStart=/usr/bin/docker attach weave
+ExecStart={{ docker_bin_dir }}/docker attach weave
 ExecStop={{ bin_dir }}/weave stop
+Restart=on-failure
 
 [Install]
 WantedBy=multi-user.target
diff --git a/roles/network_plugin/weave/templates/weaveproxy.service.j2 b/roles/network_plugin/weave/templates/weaveproxy.service.j2
index 9b2a522ba..5b3f4f86d 100644
--- a/roles/network_plugin/weave/templates/weaveproxy.service.j2
+++ b/roles/network_plugin/weave/templates/weaveproxy.service.j2
@@ -7,11 +7,11 @@ After=docker.service docker.socket
 [Service]
 EnvironmentFile=-/etc/weave.%H.env
 EnvironmentFile=-/etc/weave.env
-ExecStartPre=-/usr/bin/docker rm -f weaveproxy
+ExecStartPre=-{{ docker_bin_dir }}/docker rm -f weaveproxy
 ExecStartPre={{ bin_dir }}/weave launch-proxy $WEAVEPROXY_ARGS
-ExecStart=/usr/bin/docker attach weaveproxy
+ExecStart={{ docker_bin_dir }}/docker attach weaveproxy
 Restart=on-failure
-ExecStop=/opt/bin/weave stop-proxy
+ExecStop={{ bin_dir }}/weave stop-proxy
 
 [Install]
 WantedBy=weave-network.target
diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml
index 8678a8e44..217ce6729 100644
--- a/roles/reset/tasks/main.yml
+++ b/roles/reset/tasks/main.yml
@@ -21,7 +21,7 @@
   when: ansible_service_mgr == "systemd" and services_removed.changed
 
 - name: reset | remove all containers
-  shell: docker ps -aq | xargs -r docker rm -fv
+  shell: "{{ docker_bin_dir }}/docker ps -aq | xargs -r docker rm -fv"
 
 - name: reset | gather mounted kubelet dirs
   shell: mount | grep /var/lib/kubelet | awk '{print $3}' | tac
diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml
index 570c358f3..b258284ee 100644
--- a/scripts/collect-info.yaml
+++ b/scripts/collect-info.yaml
@@ -10,7 +10,7 @@
       - name: kernel_info
         cmd: uname -r
       - name: docker_info
-        cmd: docker info
+        cmd: "{{ docker_bin_dir }}/docker info"
       - name: ip_info
         cmd: ip -4 -o a
       - name: route_info
@@ -34,9 +34,11 @@
       - name: weave_info
         cmd: weave report
       - name: weave_logs
-        cmd: docker logs weave
+        cmd: "{{ docker_bin_dir }}/docker logs weave"
       - name: kube_describe_all
         cmd: kubectl describe all --all-namespaces
+      - name: kube_describe_nodes
+        cmd: kubectl describe nodes
       - name: kubelet_logs
         cmd: journalctl -u kubelet --no-pager
       - name: kubedns_logs
-- 
GitLab