From d7bb4d954af3c25e51132a7c26b4eccb382322f7 Mon Sep 17 00:00:00 2001
From: Kuldip Madnani <k.madnani84@gmail.com>
Date: Fri, 12 Oct 2018 12:29:51 -0500
Subject: [PATCH] Handling docker clean up during docker upgrade and docker
 config changes. (#3321)

* Added changes to clean up orphan containers and reload docker & kubelet directories.

* Added new files for cleaning up orphans and docker & kubelet directories

* Added new lines at the end of these files

* removed the trailing whitespaces from main.yml and clean-up.yml

* Updated as per the review comments

* Updated as per the review comments

* Removed service_facts and package_facts because they are not supported in ansible 2.4.0

* Corrected yaml syntax errors

* Removed the use of json_query filter and utilized selectattr

* Removed trailing spaces

* Changed the default value of docker_clean_up to false

* Added Changes to only include cleanup-docker-orphans.sh

* Reverted back changes done inside handler.

* Removed trailing spaces and made default value of docker_orphan_clean_up as true

* Reverted the default value of docker_orphan_clean_up as false

* Made the docker clean up as drop in

* Made the docker clean up as drop in

* Reverted the value of boolean docker_orphan_clean_up to false
---
 .../container-engine/docker/defaults/main.yml |  3 ++
 .../docker/files/cleanup-docker-orphans.sh    | 38 +++++++++++++++++++
 .../container-engine/docker/tasks/systemd.yml | 14 +++++++
 .../templates/docker-orphan-cleanup.conf.j2   |  2 +
 4 files changed, 57 insertions(+)
 create mode 100644 roles/container-engine/docker/files/cleanup-docker-orphans.sh
 create mode 100644 roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2

diff --git a/roles/container-engine/docker/defaults/main.yml b/roles/container-engine/docker/defaults/main.yml
index ec819b24a..fb719878d 100644
--- a/roles/container-engine/docker/defaults/main.yml
+++ b/roles/container-engine/docker/defaults/main.yml
@@ -44,3 +44,6 @@ dockerproject_rh_repo_gpgkey: 'https://yum.dockerproject.org/gpg'
 dockerproject_apt_repo_base_url: 'https://apt.dockerproject.org/repo'
 dockerproject_apt_repo_gpgkey: 'https://apt.dockerproject.org/gpg'
 docker_bin_dir: "/usr/bin"
+
+# flag to enable/disable docker cleanup
+docker_orphan_clean_up: false
diff --git a/roles/container-engine/docker/files/cleanup-docker-orphans.sh b/roles/container-engine/docker/files/cleanup-docker-orphans.sh
new file mode 100644
index 000000000..5db82f88b
--- /dev/null
+++ b/roles/container-engine/docker/files/cleanup-docker-orphans.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+list_descendants ()
+{
+  local children=$(ps -o pid= --ppid "$1")
+  for pid in $children
+  do
+    list_descendants "$pid"
+  done
+  [[ -n "$children" ]] && echo "$children"
+}
+
+count_shim_processes=$(pgrep -f ^docker-containerd-shim | wc -l)
+live_restore=$(docker info --format {{.LiveRestoreEnabled}} 2>/dev/null)
+
+if [ ${count_shim_processes} -gt 0 ] && [ -n "${live_restore}" -a "${live_restore}" == "true" ]; then
+        # Find all container pids from shims
+        orphans=$(pgrep -P $(pgrep -d ',' -f ^docker-containerd-shim) |\
+        # Filter out valid docker pids, leaving the orphans
+        egrep -v $(docker ps -q | xargs docker inspect --format '{{.State.Pid}}' | awk '{printf "%s%s",sep,$1; sep="|"}'))
+
+        if [[ -n "$orphans" ]]
+        then
+                # Get shim pids of orphans
+                orphan_shim_pids=$(ps -o pid= $(ps -o ppid= $orphans))
+
+                # Find all orphaned container PIDs
+                orphan_container_pids=$(for pid in $orphan_shim_pids; do list_descendants $pid; done)
+
+                # Recursively kill all child PIDs of orphan shims
+                echo -e "Killing orphan container PIDs and descendants: \n$(ps -O ppid= $orphan_container_pids)"
+                #kill -9 $orphan_container_pids || true
+
+        else
+                echo "No orphaned containers found"
+        fi
+else
+        echo "Either live-restore is turned off or the node doesn't have any shim processes."
+fi
\ No newline at end of file
diff --git a/roles/container-engine/docker/tasks/systemd.yml b/roles/container-engine/docker/tasks/systemd.yml
index 78cec33cc..e37d7cc47 100644
--- a/roles/container-engine/docker/tasks/systemd.yml
+++ b/roles/container-engine/docker/tasks/systemd.yml
@@ -38,4 +38,18 @@
   notify: restart docker
   when: dns_mode != 'none' and resolvconf_mode == 'docker_dns'
 
+- name: Copy docker orphan clean up script to the node
+  copy:
+    src: cleanup-docker-orphans.sh
+    dest: "{{ bin_dir }}/cleanup-docker-orphans.sh"
+    mode: 0755
+  when: docker_orphan_clean_up | bool
+
+- name: Write docker orphan clean up systemd drop-in
+  template:
+    src: docker-orphan-cleanup.conf.j2
+    dest: "/etc/systemd/system/docker.service.d/docker-orphan-cleanup.conf"
+  notify: restart docker
+  when: docker_orphan_clean_up | bool
+
 - meta: flush_handlers
diff --git a/roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2 b/roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2
new file mode 100644
index 000000000..70754ac57
--- /dev/null
+++ b/roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2
@@ -0,0 +1,2 @@
+[Service]
+ExecStop=-{{ bin_dir }}/cleanup-docker-orphans.sh
\ No newline at end of file
-- 
GitLab