From b8bc8eee41b8dbf47139d444048cb5da98a2725c Mon Sep 17 00:00:00 2001
From: Bogdan Dobrelya <bdobrelia@mirantis.com>
Date: Mon, 19 Dec 2016 15:50:04 +0100
Subject: [PATCH] Add download_always_pull check and sha256 for docker images

Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
---
 docs/downloads.md                             | 42 +++++++++++++++++++
 docs/large-deployments.md                     | 10 +----
 roles/download/defaults/main.yml              | 23 +++++++++-
 roles/download/tasks/main.yml                 | 33 +++++++++------
 .../download/tasks/set_docker_image_facts.yml | 27 ++++++++++++
 5 files changed, 113 insertions(+), 22 deletions(-)
 create mode 100644 docs/downloads.md
 create mode 100644 roles/download/tasks/set_docker_image_facts.yml

diff --git a/docs/downloads.md b/docs/downloads.md
new file mode 100644
index 000000000..2c3f3085f
--- /dev/null
+++ b/docs/downloads.md
@@ -0,0 +1,42 @@
+Downloading binaries and containers
+===================================
+
+Kargo supports several download/upload modes. The default is:
+
+* Each node downloads binaries and container images on its own, which is
+  ``download_run_once: False``.
+* For K8s apps, pull policy is ``k8s_image_pull_policy: IfNotPresent``.
+* For system managed containers, like kubelet or etcd, pull policy is
+  ``download_always_pull: False``, which is pull if only the wanted repo and
+  tag/sha256 digest differs from that the host has.
+
+There is also a "pull once, push many" mode as well:
+
+* Override the ``download_run_once: True`` to download container images only once
+  then push to cluster nodes in batches. The default delegate node
+  for pushing images is the first `kube-master`.
+* If your ansible runner node (aka the admin node) have password-less sudo and
+  docker enabled, you may want to define the ``download_localhost: True``, which
+  makes that node a delegate for pushing images while running the deployment with
+  ansible. This maybe the case if cluster nodes cannot access each over via ssh
+  or you want to use local docker images as a cache for multiple clusters.
+
+Container images and binary files are described by the vars like ``foo_version``,
+``foo_download_url``, ``foo_checksum`` for binaries and ``foo_image_repo``,
+``foo_image_tag`` or optional  ``foo_digest_checksum`` for containers.
+
+Container images may be defined by its repo and tag, for example:
+`andyshinn/dnsmasq:2.72`. Or by repo and tag and sha256 digest:
+`andyshinn/dnsmasq@sha256:7c883354f6ea9876d176fe1d30132515478b2859d6fc0cbf9223ffdc09168193`.
+
+Note, the sha256 digest and the image tag must be both specified and correspond
+to each other. The given example above is represented by the following vars:
+```
+dnsmasq_digest_checksum: 7c883354f6ea9876d176fe1d30132515478b2859d6fc0cbf9223ffdc09168193
+dnsmasq_image_repo: andyshinn/dnsmasq
+dnsmasq_image_tag: '2.72'
+```
+The full list of available vars may be found in the download's ansible role defaults.
+Those also allow to specify custom urls and local repositories for binaries and container
+images as well. See also the DNS stack docs for the related intranet configuration,
+so the hosts can resolve those urls and repos.
diff --git a/docs/large-deployments.md b/docs/large-deployments.md
index fd62f35e3..473f83954 100644
--- a/docs/large-deployments.md
+++ b/docs/large-deployments.md
@@ -8,14 +8,8 @@ For a large scaled deployments, consider the following configuration changes:
 
 * Override containers' `foo_image_repo` vars to point to intranet registry.
 
-* Override the ``download_run_once: true`` to download container images only once
-  then push to cluster nodes in batches. The default delegate node
-  for pushing images is the first kube-master. Note, if you have passwordless sudo
-  and docker enabled on the separate admin node, you may want to define the
-  ``download_localhost: true``, which makes that node a delegate for pushing images
-  while running the deployment with ansible. This maybe the case if cluster nodes
-  cannot access each over via ssh or you want to use local docker images as a cache
-  for multiple clusters.
+* Override the ``download_run_once: true`` and/or ``download_localhost: true``.
+  See download modes for details.
 
 * Adjust the `retry_stagger` global var as appropriate. It should provide sane
   load on a delegate (the first K8s master node) then retrying failed
diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml
index 165c7963c..df9bd75c7 100644
--- a/roles/download/defaults/main.yml
+++ b/roles/download/defaults/main.yml
@@ -14,6 +14,9 @@ download_compress: 9
 # in the download_run_once mode.
 download_localhost: False
 
+# Always pull images if set to True. Otherwise check by the repo's tag/digest.
+download_always_pull: False
+
 # Versions
 etcd_version: v3.0.6
 #TODO(mattymo): Move calico versions to roles/network_plugins/calico/defaults
@@ -85,16 +88,19 @@ downloads:
     container: true
     repo: "{{ netcheck_server_img_repo }}"
     tag: "{{ netcheck_tag }}"
+    sha256: "{{ netcheck_server_digest_checksum|default(None) }}"
     enabled: "{{ deploy_netchecker|bool }}"
   netcheck_agent:
     container: true
     repo: "{{ netcheck_agent_img_repo }}"
     tag: "{{ netcheck_tag }}"
+    sha256: "{{ netcheck_agent_digest_checksum|default(None) }}"
     enabled: "{{ deploy_netchecker|bool }}"
   netcheck_kubectl:
     container: true
     repo: "{{ netcheck_kubectl_img_repo }}"
     tag: "{{ netcheck_kubectl_tag }}"
+    sha256: "{{ netcheck_kubectl_digest_checksum|default(None) }}"
     enabled: "{{ deploy_netchecker|bool }}"
   weave:
     dest: weave/bin/weave
@@ -108,7 +114,8 @@ downloads:
   etcd:
     version: "{{etcd_version}}"
     dest: "etcd/etcd-{{ etcd_version }}-linux-amd64.tar.gz"
-    sha256: "{{ etcd_checksum }}"
+    sha256: >-
+      {%- if etcd_deployment_type == 'docker' -%}{{etcd_digest_checksum|default(None)}}{%- else -%}{{etcd_checksum}}{%- endif -%}
     source_url: "{{ etcd_download_url }}"
     url: "{{ etcd_download_url }}"
     unarchive: true
@@ -121,64 +128,78 @@ downloads:
     container: true
     repo: "{{ hyperkube_image_repo }}"
     tag: "{{ hyperkube_image_tag }}"
+    sha256: "{{ hyperkube_digest_checksum|default(None) }}"
   flannel:
     container: true
     repo: "{{ flannel_image_repo }}"
     tag: "{{ flannel_image_tag }}"
+    sha256: "{{ flannel_digest_checksum|default(None) }}"
     enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}"
   calicoctl:
     container: true
     repo: "{{ calicoctl_image_repo }}"
     tag: "{{ calicoctl_image_tag }}"
+    sha256: "{{ calicoctl_digest_checksum|default(None) }}"
     enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}"
   calico_node:
     container: true
     repo: "{{ calico_node_image_repo }}"
     tag: "{{ calico_node_image_tag }}"
+    sha256: "{{ calico_node_digest_checksum|default(None) }}"
     enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}"
   calico_cni:
     container: true
     repo: "{{ calico_cni_image_repo }}"
     tag: "{{ calico_cni_image_tag }}"
+    sha256: "{{ calico_cni_digest_checksum|default(None) }}"
     enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}"
   calico_policy:
     container: true
     repo: "{{ calico_policy_image_repo }}"
     tag: "{{ calico_policy_image_tag }}"
+    sha256: "{{ calico_policy_digest_checksum|default(None) }}"
     enabled: "{{ kube_network_plugin == 'canal' }}"
   calico_rr:
     container: true
     repo: "{{ calico_rr_image_repo }}"
     tag: "{{ calico_rr_image_tag }}"
+    sha256: "{{ calico_rr_digest_checksum|default(None) }}"
     enabled: "{{ peer_with_calico_rr is defined and peer_with_calico_rr}} and kube_network_plugin == 'calico'"
   pod_infra:
     container: true
     repo: "{{ pod_infra_image_repo }}"
     tag: "{{ pod_infra_image_tag }}"
+    sha256: "{{ pod_infra_digest_checksum|default(None) }}"
   nginx:
     container: true
     repo: "{{ nginx_image_repo }}"
     tag: "{{ nginx_image_tag }}"
+    sha256: "{{ nginx_digest_checksum|default(None) }}"
   dnsmasq:
     container: true
     repo: "{{ dnsmasq_image_repo }}"
     tag: "{{ dnsmasq_image_tag }}"
+    sha256: "{{ dnsmasq_digest_checksum|default(None) }}"
   kubednsmasq:
     container: true
     repo: "{{ kubednsmasq_image_repo }}"
     tag: "{{ kubednsmasq_image_tag }}"
+    sha256: "{{ kubednsmasq_digest_checksum|default(None) }}"
   kubedns:
     container: true
     repo: "{{ kubedns_image_repo }}"
     tag: "{{ kubedns_image_tag }}"
+    sha256: "{{ kubedns_digest_checksum|default(None) }}"
   testbox:
     container: true
     repo: "{{ test_image_repo }}"
     tag: "{{ test_image_tag }}"
+    sha256: "{{ testbox_digest_checksum|default(None) }}"
   exechealthz:
     container: true
     repo: "{{ exechealthz_image_repo }}"
     tag: "{{ exechealthz_image_tag }}"
+    sha256: "{{ exechealthz_digest_checksum|default(None) }}"
 
 download:
   container: "{{ file.container|default('false') }}"
diff --git a/roles/download/tasks/main.yml b/roles/download/tasks/main.yml
index aa2572ee7..8592ed223 100644
--- a/roles/download/tasks/main.yml
+++ b/roles/download/tasks/main.yml
@@ -39,11 +39,6 @@
     mode: "{{ download.mode|default(omit) }}"
   when: "{{ download.enabled|bool and not download.container|bool and (download.unarchive is not defined or download.unarchive == False) }}"
 
-- name: pulling...
-  debug:
-    msg: "{{ download.repo }}:{{ download.tag }}"
-  when: "{{ download.enabled|bool and download.container|bool }}"
-
 - set_fact:
     download_delegate: "{% if download_localhost %}localhost{% else %}{{groups['kube-master'][0]}}{% endif %}"
   tags: facts
@@ -70,29 +65,41 @@
   when: "{{ download_run_once|bool and download.enabled|bool and download.container|bool and download_delegate == 'localhost' }}"
   tags: localhost
 
+- name: Make download decision if pull is required by tag or sha256
+  include: set_docker_image_facts.yml
+  when: "{{ download.enabled|bool and download.container|bool }}"
+  delegate_to: "{{ download_delegate if download_run_once|bool else inventory_hostname }}"
+  run_once: "{{ download_run_once|bool }}"
+  tags: facts
+
+- name: pulling...
+  debug:
+    msg: "{{ pull_args }}"
+  when: "{{ download.enabled|bool and download.container|bool }}"
+
 #NOTE(bogdando) this brings no docker-py deps for nodes
-- name: Download containers
-  command: "/usr/bin/docker pull {{ download.repo }}:{{ download.tag }}"
+- name: Download containers if pull is required or told to always pull
+  command: "/usr/bin/docker pull {{ pull_args }}"
   register: pull_task_result
   until: pull_task_result|success
   retries: 4
   delay: "{{ retry_stagger | random + 3 }}"
-  when: "{{ download.enabled|bool and download.container|bool }}"
+  when: "{{ download.enabled|bool and download.container|bool and pull_required|bool|default(download_always_pull) }}"
   delegate_to: "{{ download_delegate if download_run_once|bool else inventory_hostname }}"
   run_once: "{{ download_run_once|bool }}"
 
 - set_fact:
-    fname: "{{local_release_dir}}/containers/{{download.repo|regex_replace('/|\0|:', '_')}}:{{download.tag|regex_replace('/|\0|:', '_')}}.tar"
+    fname: "{{local_release_dir}}/containers/{{download.repo|regex_replace('/|\0|:', '_')}}:{{download.tag|default(download.sha256)|regex_replace('/|\0|:', '_')}}.tar"
   tags: facts
 
 - name: "Set default value for 'container_changed' to false"
   set_fact:
-    container_changed: false
+    container_changed: "{{pull_required|bool|default(false)}}"
 
 - name: "Update the 'container_changed' fact"
   set_fact:
-    container_changed: "{{ not 'up to date' in pull_task_result.stdout }}"
-  when: "{{ download.enabled|bool and download.container|bool }}"
+    container_changed: "{{ pull_required|bool|default(false) or not 'up to date' in pull_task_result.stdout }}"
+  when: "{{ download.enabled|bool and download.container|bool and pull_required|bool|default(download_always_pull) }}"
   delegate_to: "{{ download_delegate if download_run_once|bool else inventory_hostname }}"
   run_once: "{{ download_run_once|bool }}"
   tags: facts
@@ -108,7 +115,7 @@
   tags: facts
 
 - name: Download | save container images
-  shell: docker save "{{ download.repo }}:{{ download.tag }}" | gzip -{{ download_compress }} > "{{ fname }}"
+  shell: docker save "{{ pull_args }}" | gzip -{{ download_compress }} > "{{ fname }}"
   delegate_to: "{{ download_delegate }}"
   register: saved
   run_once: true
diff --git a/roles/download/tasks/set_docker_image_facts.yml b/roles/download/tasks/set_docker_image_facts.yml
new file mode 100644
index 000000000..7014487a5
--- /dev/null
+++ b/roles/download/tasks/set_docker_image_facts.yml
@@ -0,0 +1,27 @@
+---
+- set_fact:
+    pull_by_digest: >-
+      {%- if download.sha256 is defined and download.sha256 != '' -%}true{%- else -%}false{%- endif -%}
+
+- set_fact:
+    pull_args: >-
+      {%- if pull_by_digest|bool %}{{download.repo}}@sha256:{{download.sha256}}{%- else -%}{{download.repo}}:{{download.tag}}{%- endif -%}
+
+- name: Register docker images info
+  shell: "{% raw %}/usr/bin/docker images -q | xargs /usr/bin/docker inspect -f '{{.RepoTags}},{{.RepoDigests}}'{% endraw %}"
+  register: docker_images_raw
+  ignore_errors: true
+  when: not download_always_pull|bool
+
+- set_fact: docker_images="{{docker_images_raw.stdout|regex_replace('\[|\]|\\n]','')|regex_replace('\s',',')}}"
+  when: not download_always_pull|bool
+
+- set_fact:
+    pull_required: >-
+      {%- if pull_args in docker_images.split(',') %}false{%- else -%}true{%- endif -%}
+  when: not download_always_pull|bool
+
+- name: Check the local digest sha256 corresponds to the given image tag
+  assert:
+    that: "{{download.repo}}:{{download.tag}} in docker_images.split(',')"
+  when: not download_always_pull|bool and not pull_required|bool and pull_by_digest|bool
-- 
GitLab