Skip to content
Snippets Groups Projects
Commit 7760f75a authored by Smaine Kahlouch's avatar Smaine Kahlouch Committed by GitHub
Browse files

Merge pull request #488 from bogdando/issue/480

Distribute container images across nodes
parents 76c43f62 390764c2
No related branches found
No related tags found
No related merge requests found
Showing
with 153 additions and 47 deletions
Large deployments of K8s
========================
For a large scaled deployments, consider the following configuration changes:
* Tune [ansible settings](http://docs.ansible.com/ansible/intro_configuration.html)
for `forks` and `timeout` vars to fit large numbers of nodes being deployed.
* Override containers' `foo_image_repo` vars to point to intranet registry.
* Override the ``download_run_once: true`` to download binaries and container
images only once then push to nodes in batches.
* Adjust the `retry_stagger` global var as appropriate. It should provide sane
load on a delegate (the first K8s master node) then retrying failed
push or download operations.
For example, when deploying 200 nodes, you may want to run ansible with
``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.
......@@ -7,6 +7,8 @@ bin_dir: /usr/local/bin
# Where the binaries will be downloaded.
# Note: ensure that you've enough disk space (about 1G)
local_release_dir: "/tmp/releases"
# Random shifts for retrying failed ops like pushing/downloading
retry_stagger: 5
# Uncomment this line for CoreOS only.
# Directory where python binary is installed
......
......@@ -30,7 +30,7 @@
register: keyserver_task_result
until: keyserver_task_result|success
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
with_items: "{{ docker_repo_key_info.repo_keys }}"
when: ansible_os_family != "CoreOS"
......@@ -58,7 +58,7 @@
register: docker_task_result
until: docker_task_result|success
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
with_items: "{{ docker_package_info.pkgs }}"
when: (ansible_os_family != "CoreOS") and (docker_package_info.pkgs|length > 0)
......
......@@ -13,6 +13,8 @@ etcd_version: v3.0.6
calico_version: v0.20.0
calico_cni_version: v1.3.1
weave_version: v1.6.1
flannel_version: 0.5.5
flannel_server_helper_version: 0.1
# Download URL's
etcd_download_url: "https://storage.googleapis.com/kargo/{{etcd_version}}_etcd"
......@@ -26,6 +28,22 @@ calico_cni_ipam_checksum: "3df6951a30749c279229e7e318e74ac4e41263996125be65257db
weave_checksum: "9bf9d6e5a839e7bcbb28cc00c7acae9d09284faa3e7a3720ca9c2b9e93c68580"
etcd_checksum: "385afd518f93e3005510b7aaa04d38ee4a39f06f5152cd33bb86d4f0c94c7485"
# Containers
# Possible values: host, docker
etcd_deployment_type: "docker"
etcd_image_repo: "quay.io/coreos/etcd"
etcd_image_tag: "{{ etcd_version }}"
flannel_server_helper_image_repo: "gcr.io/google_containers/flannel-server-helper"
flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}"
flannel_image_repo: "quay.io/coreos/flannel"
flannel_image_tag: "{{ flannel_version }}"
calicoctl_image_repo: "calico/ctl"
calicoctl_image_tag: "{{ calico_version }}"
calico_node_image_repo: "calico/node"
calico_node_image_tag: "{{ calico_version }}"
hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
downloads:
calico_cni_plugin:
dest: calico/bin/calico
......@@ -35,6 +53,7 @@ downloads:
url: "{{ calico_cni_download_url }}"
owner: "root"
mode: "0755"
enabled: "{{ kube_network_plugin == 'calico' }}"
calico_cni_plugin_ipam:
dest: calico/bin/calico-ipam
version: "{{calico_cni_version}}"
......@@ -43,6 +62,7 @@ downloads:
url: "{{ calico_cni_ipam_download_url }}"
owner: "root"
mode: "0755"
enabled: "{{ kube_network_plugin == 'calico' }}"
weave:
dest: weave/bin/weave
version: "{{weave_version}}"
......@@ -51,6 +71,7 @@ downloads:
sha256: "{{ weave_checksum }}"
owner: "root"
mode: "0755"
enabled: "{{ kube_network_plugin == 'weave' }}"
etcd:
version: "{{etcd_version}}"
dest: "etcd/etcd-{{ etcd_version }}-linux-amd64.tar.gz"
......@@ -60,10 +81,38 @@ downloads:
unarchive: true
owner: "etcd"
mode: "0755"
nothing:
enabled: false
container: "{{ etcd_deployment_type == 'docker' }}"
repo: "{{ etcd_image_repo }}"
tag: "{{ etcd_image_tag }}"
hyperkube:
container: true
repo: "{{ hyperkube_image_repo }}"
tag: "{{ hyperkube_image_tag }}"
flannel:
container: true
repo: "{{ flannel_image_repo }}"
tag: "{{ flannel_image_tag }}"
enabled: "{{ kube_network_plugin == 'flannel' }}"
flannel_server_helper:
container: true
repo: "{{ flannel_server_helper_image_repo }}"
tag: "{{ flannel_server_helper_image_tag }}"
enabled: "{{ kube_network_plugin == 'flannel' }}"
calicoctl:
container: true
repo: "{{ calicoctl_image_repo }}"
tag: "{{ calicoctl_image_tag }}"
enabled: "{{ kube_network_plugin == 'calico' }}"
calico_node:
container: true
repo: "{{ calico_node_image_repo }}"
tag: "{{ calico_node_image_tag }}"
enabled: "{{ kube_network_plugin == 'calico' }}"
download:
container: "{{ file.container|default('false') }}"
repo: "{{ file.repo|default(None) }}"
tag: "{{ file.tag|default(None) }}"
enabled: "{{ file.enabled|default('true') }}"
dest: "{{ file.dest|default(None) }}"
version: "{{ file.version|default(None) }}"
......
......@@ -4,11 +4,12 @@
- name: downloading...
debug:
msg: "{{ download.url }}"
when: "{{ download.enabled|bool }}"
when: "{{ download.enabled|bool and not download.container|bool }}"
- name: Create dest directories
file: path={{local_release_dir}}/{{download.dest|dirname}} state=directory recurse=yes
when: "{{ download.enabled|bool }}"
when: "{{ download.enabled|bool and not download.container|bool }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: Download items
......@@ -18,7 +19,12 @@
sha256sum: "{{download.sha256 | default(omit)}}"
owner: "{{ download.owner|default(omit) }}"
mode: "{{ download.mode|default(omit) }}"
when: "{{ download.enabled|bool }}"
register: get_url_result
until: "'OK' in get_url_result.msg or 'file already exists' in get_url_result.msg"
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: "{{ download.enabled|bool and not download.container|bool }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: Extract archives
......@@ -28,7 +34,8 @@
owner: "{{ download.owner|default(omit) }}"
mode: "{{ download.mode|default(omit) }}"
copy: no
when: "{{ download.enabled|bool }} and ({{download.unarchive is defined and download.unarchive == True}})"
when: "{{ download.enabled|bool and not download.container|bool and download.unarchive is defined and download.unarchive == True }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: Fix permissions
......@@ -37,5 +44,50 @@
path: "{{local_release_dir}}/{{download.dest}}"
owner: "{{ download.owner|default(omit) }}"
mode: "{{ download.mode|default(omit) }}"
when: "{{ download.enabled|bool }} and ({{download.unarchive is not defined or download.unarchive == False}})"
when: "{{ download.enabled|bool and not download.container|bool and (download.unarchive is not defined or download.unarchive == False) }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: pulling...
debug:
msg: "{{ download.repo }}:{{ download.tag }}"
when: "{{ download.enabled|bool and download.container|bool }}"
- name: Create dest directory for saved/loaded container images
file: path="{{local_release_dir}}/containers" state=directory recurse=yes
when: "{{ download.enabled|bool and download.container|bool }}"
#NOTE(bogdando) this brings no docker-py deps for nodes
- name: Download containers
command: "/usr/bin/docker pull {{ download.repo }}:{{ download.tag }}"
register: pull_task_result
until: pull_task_result.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: "{{ download.enabled|bool and download.container|bool }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- set_fact:
fname: "{{local_release_dir}}/containers/{{download.repo|regex_replace('/|\0|:', '_')}}:{{download.tag|regex_replace('/|\0|:', '_')}}.tar"
- name: Download | save container images
shell: docker save "{{ download.repo }}:{{ download.tag }}" > "{{ fname }}"
delegate_to: "{{groups['kube-master'][0]}}"
run_once: true
when: ansible_os_family != "CoreOS" and download_run_once|bool
- name: Download | get container images
synchronize:
src: "{{ fname }}"
dest: "{{local_release_dir}}/containers"
mode: push
register: get_task
until: get_task|success
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool
- name: Download | load container images
shell: docker load < "{{ fname }}"
when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool
---
etcd_version: v3.0.6
etcd_bin_dir: "{{ local_release_dir }}/etcd/etcd-{{ etcd_version }}-linux-amd64/"
# Possible values: host, docker
etcd_deployment_type: "docker"
etcd_image_repo: "quay.io/coreos/etcd"
etcd_image_tag: "{{ etcd_version }}"
......@@ -3,8 +3,7 @@ dependencies:
- role: adduser
user: "{{ addusers.etcd }}"
when: ansible_os_family != 'CoreOS'
- role: download
file: "{{ downloads.etcd }}"
when: etcd_deployment_type == "host"
- role: docker
when: (ansible_os_family != "CoreOS" and etcd_deployment_type == "docker" or inventory_hostname in groups['k8s-cluster'])
- role: download
file: "{{ downloads.etcd }}"
......@@ -20,7 +20,7 @@
register: etcd_task_result
until: etcd_task_result.rc == 0
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
changed_when: false
#Plan B: looks nicer, but requires docker-py on all hosts:
......
......@@ -10,6 +10,3 @@ kube_users_dir: "{{ kube_config_dir }}/users"
# An experimental dev/test only dynamic volumes provisioner,
# for PetSets. Works for kube>=v1.3 only.
kube_hostpath_dynamic_provisioner: "false"
hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
---
dependencies:
- role: download # For kube_version variable
file: "{{ downloads.nothing }}"
- role: download
file: "{{ downloads.hyperkube }}"
......@@ -12,7 +12,7 @@
register: kube_task_result
until: kube_task_result.rc == 0
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
changed_when: false
- name: Write kube-apiserver manifest
......
......@@ -8,9 +8,6 @@ kube_resolv_conf: "/etc/resolv.conf"
kube_proxy_mode: iptables
hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
# IP address of the DNS server.
# Kubernetes will create a pod with several containers, serving as the DNS
# server and expose it under this IP address. The IP address must be from
......
---
dependencies:
- role: download #For kube_version
file: "{{ downloads.nothing }}"
- role: download
file: "{{ downloads.hyperkube }}"
- role: kubernetes/secrets
......@@ -104,7 +104,7 @@
register: pkgs_task_result
until: pkgs_task_result|success
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
with_items: "{{required_pkgs | default([]) | union(common_required_pkgs|default([]))}}"
when: ansible_os_family != "CoreOS"
......
......@@ -7,9 +7,3 @@ ipip: false
# cloud_provider can only be set to 'gce' or 'aws'
# cloud_provider:
calicoctl_image_repo: calico/ctl
calicoctl_image_tag: "{{ calico_version }}"
calico_node_image_repo: calico/node
calico_node_image_tag: "{{ calico_version }}"
......@@ -4,3 +4,9 @@ dependencies:
file: "{{ downloads.calico_cni_plugin }}"
- role: download
file: "{{ downloads.calico_cni_plugin_ipam }}"
- role: download
file: "{{ downloads.calico_node }}"
- role: download
file: "{{ downloads.calicoctl }}"
- role: download
file: "{{ downloads.hyperkube }}"
......@@ -48,7 +48,7 @@
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
changed_when: false
when: use_hyperkube_cni
......
......@@ -10,10 +10,3 @@ flannel_public_ip: "{{ access_ip|default(ip|default(ansible_default_ipv4.address
# You can choose what type of flannel backend to use
# please refer to flannel's docs : https://github.com/coreos/flannel/blob/master/README.md
flannel_backend_type: "vxlan"
flannel_server_helper_image_repo: "gcr.io/google_containers/flannel-server-helper"
flannel_server_helper_image_tag: "0.1"
flannel_image_repo: "quay.io/coreos/flannel"
flannel_image_tag: "0.5.5"
---
dependencies:
- role: download
file: "{{ downloads.flannel_server_helper }}"
- role: download
file: "{{ downloads.flannel }}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment