From fc5937e9480a93b3d2920aee677920e99ea54c10 Mon Sep 17 00:00:00 2001
From: Ashish Singh Dev <ashishsinghdev@gmail.com>
Date: Mon, 12 Jun 2023 09:15:47 +0530
Subject: [PATCH] fix gce-pd-csi driver (#10208)

* fix gce-pd-csi driver

* fixed, 1. reading replicas value from defaults.yml, and 2. corrected gcp-pd-csi driver version in README.md
---
 README.md                                     |   2 +-
 roles/download/defaults/main.yml              |   2 +-
 .../csi_driver/gcp_pd/tasks/main.yml          |   2 +
 .../templates/gcp-pd-csi-controller.yml.j2    | 100 +++++++++++-
 .../gcp_pd/templates/gcp-pd-csi-node.yml.j2   |   3 +-
 .../templates/gcp-pd-csi-sc-regional.yml.j2   |   9 ++
 .../templates/gcp-pd-csi-sc-zonal.yml.j2      |   8 +
 .../gcp_pd/templates/gcp-pd-csi-setup.yml.j2  | 147 ++++++++++++++----
 8 files changed, 237 insertions(+), 36 deletions(-)
 create mode 100644 roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2
 create mode 100644 roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2

diff --git a/README.md b/README.md
index 6c110f20b..cea84d04b 100644
--- a/README.md
+++ b/README.md
@@ -191,7 +191,7 @@ Note: Upstart/SysV init based OS types are not supported.
   - [aws-ebs-csi-plugin](https://github.com/kubernetes-sigs/aws-ebs-csi-driver) v0.5.0
   - [azure-csi-plugin](https://github.com/kubernetes-sigs/azuredisk-csi-driver) v1.10.0
   - [cinder-csi-plugin](https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/cinder-csi-plugin/using-cinder-csi-plugin.md) v1.22.0
-  - [gcp-pd-csi-plugin](https://github.com/kubernetes-sigs/gcp-compute-persistent-disk-csi-driver) v1.4.0
+  - [gcp-pd-csi-plugin](https://github.com/kubernetes-sigs/gcp-compute-persistent-disk-csi-driver) v1.9.2
   - [local-path-provisioner](https://github.com/rancher/local-path-provisioner) v0.0.23
   - [local-volume-provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner) v2.5.0
 
diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml
index f07110a87..fb75c2036 100644
--- a/roles/download/defaults/main.yml
+++ b/roles/download/defaults/main.yml
@@ -1107,7 +1107,7 @@ aws_ebs_csi_plugin_version: "v0.5.0"
 aws_ebs_csi_plugin_image_repo: "{{ docker_image_repo }}/amazon/aws-ebs-csi-driver"
 aws_ebs_csi_plugin_image_tag: "{{ aws_ebs_csi_plugin_version }}"
 
-gcp_pd_csi_plugin_version: "v1.4.0"
+gcp_pd_csi_plugin_version: "v1.9.2"
 gcp_pd_csi_plugin_image_repo: "{{ kube_image_repo }}/cloud-provider-gcp/gcp-compute-persistent-disk-csi-driver"
 gcp_pd_csi_plugin_image_tag: "{{ gcp_pd_csi_plugin_version }}"
 
diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml b/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml
index 59a99f74e..be511caa4 100644
--- a/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml
+++ b/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml
@@ -28,6 +28,8 @@
     - {name: gcp-pd-csi-setup, file: gcp-pd-csi-setup.yml}
     - {name: gcp-pd-csi-controller, file: gcp-pd-csi-controller.yml}
     - {name: gcp-pd-csi-node, file: gcp-pd-csi-node.yml}
+    - {name: gcp-pd-csi-sc-regional, file: gcp-pd-csi-sc-regional.yml}
+    - {name: gcp-pd-csi-sc-zonal, file: gcp-pd-csi-sc-zonal.yml}
   register: gcp_pd_csi_manifests
   when: inventory_hostname == groups['kube_control_plane'][0]
 
diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2
index 4762093dc..61157d8fc 100644
--- a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2
+++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2
@@ -1,10 +1,9 @@
-kind: StatefulSet
+kind: Deployment
 apiVersion: apps/v1
 metadata:
   name: csi-gce-pd-controller
   namespace: kube-system
 spec:
-  serviceName: "csi-gce-pd"
   replicas: {{ gcp_pd_csi_controller_replicas }}
   selector:
     matchLabels:
@@ -19,6 +18,8 @@ spec:
       # this requirement when issue is resolved and before any exposure of
       # metrics ports
       hostNetwork: true
+      nodeSelector:
+        kubernetes.io/os: linux
       serviceAccountName: csi-gce-pd-controller-sa
       priorityClassName: csi-gce-pd-controller
       containers:
@@ -28,9 +29,32 @@ spec:
             - "--v=5"
             - "--csi-address=/csi/csi.sock"
             - "--feature-gates=Topology=true"
+            - "--http-endpoint=:22011"
+            - "--leader-election-namespace=$(PDCSI_NAMESPACE)"
+            - "--timeout=250s"
+            - "--extra-create-metadata"
+          # - "--run-controller-service=false"  # disable the controller service of the CSI driver
+          # - "--run-node-service=false"        # disable the node service of the CSI driver
+            - "--leader-election"
             - "--default-fstype=ext4"
-          # - "--run-controller-service=false" # disable the controller service of the CSI driver
-          # - "--run-node-service=false"       # disable the node service of the CSI driver
+            - "--controller-publish-readonly=true"
+          env:
+            - name: PDCSI_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          ports:
+            - containerPort: 22011
+              name: http-endpoint
+              protocol: TCP
+          livenessProbe:
+            failureThreshold: 1
+            httpGet:
+              path: /healthz/leader-election
+              port: http-endpoint
+            initialDelaySeconds: 10
+            timeoutSeconds: 10
+            periodSeconds: 20
           volumeMounts:
             - name: socket-dir
               mountPath: /csi
@@ -39,6 +63,27 @@ spec:
           args:
             - "--v=5"
             - "--csi-address=/csi/csi.sock"
+            - "--http-endpoint=:22012"
+            - "--leader-election"
+            - "--leader-election-namespace=$(PDCSI_NAMESPACE)"
+            - "--timeout=250s"
+          env:
+            - name: PDCSI_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          ports:
+            - containerPort: 22012
+              name: http-endpoint
+              protocol: TCP
+          livenessProbe:
+            failureThreshold: 1
+            httpGet:
+              path: /healthz/leader-election
+              port: http-endpoint
+            initialDelaySeconds: 10
+            timeoutSeconds: 10
+            periodSeconds: 20
           volumeMounts:
             - name: socket-dir
               mountPath: /csi
@@ -47,6 +92,44 @@ spec:
           args:
             - "--v=5"
             - "--csi-address=/csi/csi.sock"
+            - "--http-endpoint=:22013"
+            - "--leader-election"
+            - "--leader-election-namespace=$(PDCSI_NAMESPACE)"
+            - "--handle-volume-inuse-error=false"
+          env:
+            - name: PDCSI_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          ports:
+            - containerPort: 22013
+              name: http-endpoint
+              protocol: TCP
+          livenessProbe:
+            failureThreshold: 1
+            httpGet:
+              path: /healthz/leader-election
+              port: http-endpoint
+            initialDelaySeconds: 10
+            timeoutSeconds: 10
+            periodSeconds: 20
+          volumeMounts:
+            - name: socket-dir
+              mountPath: /csi
+        - name: csi-snapshotter
+          image: {{ csi_snapshotter_image_repo }}:{{ csi_snapshotter_image_tag }}
+          args:
+            - "--v=5"
+            - "--csi-address=/csi/csi.sock"
+            - "--metrics-address=:22014"
+            - "--leader-election"
+            - "--leader-election-namespace=$(PDCSI_NAMESPACE)"
+            - "--timeout=300s"
+          env:
+            - name: PDCSI_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
           volumeMounts:
             - name: socket-dir
               mountPath: /csi
@@ -72,4 +155,11 @@ spec:
         - name: cloud-sa-volume
           secret:
             secretName: cloud-sa
-  volumeClaimTemplates: []
+---
+apiVersion: storage.k8s.io/v1
+kind: CSIDriver
+metadata:
+  name: pd.csi.storage.gke.io
+spec:
+  attachRequired: true
+  podInfoOnMount: false
\ No newline at end of file
diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2
index 204ff972e..9aad62069 100644
--- a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2
+++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2
@@ -49,6 +49,7 @@ spec:
           args:
             - "--v=5"
             - "--endpoint=unix:/csi/csi.sock"
+            - "--run-controller-service=false"
           volumeMounts:
             - name: kubelet-dir
               mountPath: /var/lib/kubelet
@@ -108,4 +109,4 @@ spec:
       # See "special case". This will tolerate everything. Node component should
       # be scheduled on all nodes.
       tolerations:
-      - operator: Exists
+      - operator: Exists
\ No newline at end of file
diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2
new file mode 100644
index 000000000..57a8675e4
--- /dev/null
+++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2
@@ -0,0 +1,9 @@
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: csi-gce-pd-regional
+provisioner: pd.csi.storage.gke.io
+parameters:
+  type: pd-balanced
+  replication-type: regional-pd
+volumeBindingMode: WaitForFirstConsumer
\ No newline at end of file
diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2
new file mode 100644
index 000000000..e9bedaf83
--- /dev/null
+++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2
@@ -0,0 +1,8 @@
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: csi-gce-pd-zonal
+provisioner: pd.csi.storage.gke.io
+parameters:
+  type: pd-balanced
+volumeBindingMode: WaitForFirstConsumer
\ No newline at end of file
diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2
index 4c693b3fd..610baf33b 100644
--- a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2
+++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2
@@ -38,8 +38,21 @@ rules:
   - apiGroups: [""]
     resources: ["nodes"]
     verbs: ["get", "list", "watch"]
-
+  - apiGroups: ["snapshot.storage.k8s.io"]
+    resources: ["volumesnapshots"]
+    verbs: ["get", "list"]
+  - apiGroups: ["snapshot.storage.k8s.io"]
+    resources: ["volumesnapshotcontents"]
+    verbs: ["get", "list"]
+  # Access to volumeattachments is only needed when the CSI driver
+  # has the PUBLISH_UNPUBLISH_VOLUME controller capability.
+  # In that case, external-provisioner will watch volumeattachments
+  # to determine when it is safe to delete a volume.
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["volumeattachments"]
+    verbs: ["get", "list", "watch"]
 ---
+
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
@@ -130,6 +143,10 @@ rules:
   - apiGroups: [""]
     resources: ["events"]
     verbs: ["list", "watch", "create", "update", "patch"]
+  # If handle-volume-inuse-error=true, the pod specific rbac is needed
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
 
 ---
 kind: ClusterRoleBinding
@@ -144,34 +161,30 @@ roleRef:
   kind: ClusterRole
   name: csi-gce-pd-resizer-role
   apiGroup: rbac.authorization.k8s.io
-
 ---
-apiVersion: policy/v1beta1
-kind: PodSecurityPolicy
-metadata:
-  name: csi-gce-pd-node-psp
-spec:
-  seLinux:
-    rule: RunAsAny
-  supplementalGroups:
-    rule: RunAsAny
-  runAsUser:
-    rule: RunAsAny
-  fsGroup:
-    rule: RunAsAny
-  privileged: true
-  volumes:
-  - '*'
-  hostNetwork: true
-  allowedHostPaths:
-  - pathPrefix: "/var/lib/kubelet/plugins_registry/"
-  - pathPrefix: "/var/lib/kubelet"
-  - pathPrefix: "/var/lib/kubelet/plugins/pd.csi.storage.gke.io/"
-  - pathPrefix: "/dev"
-  - pathPrefix: "/etc/udev"
-  - pathPrefix: "/lib/udev"
-  - pathPrefix: "/run/udev"
-  - pathPrefix: "/sys"
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: csi-gce-pd-controller-deploy
+rules:
+  - apiGroups: ["policy"]
+    resources: ["podsecuritypolicies"]
+    verbs: ["use"]
+    resourceNames:
+      - csi-gce-pd-controller-psp
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: csi-gce-pd-controller-deploy
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: csi-gce-pd-controller-deploy
+subjects:
+  - kind: ServiceAccount
+    name: csi-gce-pd-controller-sa
+    namespace: kube-system
 ---
 
 kind: ClusterRole
@@ -198,3 +211,81 @@ subjects:
 - kind: ServiceAccount
   name: csi-gce-pd-node-sa
   namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: csi-gce-pd-controller
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: csi-gce-pd-node-deploy
+subjects:
+- kind: ServiceAccount
+  name: csi-gce-pd-controller-sa
+  namespace: kube-system
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: csi-gce-pd-snapshotter-role
+rules:
+  - apiGroups: [""]
+    resources: ["events"]
+    verbs: ["list", "watch", "create", "update", "patch"]
+  # Secrets resource omitted since GCE PD snapshots does not require them
+  - apiGroups: ["snapshot.storage.k8s.io"]
+    resources: ["volumesnapshotclasses"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["snapshot.storage.k8s.io"]
+    resources: ["volumesnapshotcontents"]
+    verbs: ["create", "get", "list", "watch", "update", "delete", "patch"]
+  - apiGroups: ["snapshot.storage.k8s.io"]
+    resources: ["volumesnapshotcontents/status"]
+    verbs: ["update", "patch"]
+---
+
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: csi-gce-pd-controller-snapshotter-binding
+subjects:
+  - kind: ServiceAccount
+    name: csi-gce-pd-controller-sa
+    namespace: kube-system
+roleRef:
+  kind: ClusterRole
+  name: csi-gce-pd-snapshotter-role
+  apiGroup: rbac.authorization.k8s.io
+---
+
+kind: Role
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: csi-gce-pd-leaderelection-role
+  namespace: kube-system
+  labels:
+    k8s-app: gcp-compute-persistent-disk-csi-driver
+rules:
+- apiGroups: ["coordination.k8s.io"]
+  resources: ["leases"]
+  verbs: ["get", "watch", "list", "delete", "update", "create"]
+
+---
+
+kind: RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: csi-gce-pd-controller-leaderelection-binding
+  namespace: kube-system
+  labels:
+    k8s-app: gcp-compute-persistent-disk-csi-driver
+subjects:
+- kind: ServiceAccount
+  name: csi-gce-pd-controller-sa
+  namespace: kube-system
+roleRef:
+  kind: Role
+  name: csi-gce-pd-leaderelection-role
+  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
-- 
GitLab