diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 77a9fd41a067c971842edf141a09c42a8d23babb..4fc4582394f4b90e5f9d0a2af88d9d4c6dd60e16 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -296,10 +296,18 @@ before_script: # stage: deploy-special MOVED_TO_GROUP_VARS: "true" +.centos7_kube_router_variables: ¢os7_kube_router_variables +# stage: deploy-special + MOVED_TO_GROUP_VARS: "true" + .coreos_alpha_weave_ha_variables: &coreos_alpha_weave_ha_variables # stage: deploy-special MOVED_TO_GROUP_VARS: "true" +.coreos_kube_router_variables: &coreos_kube_router_variables +# stage: deploy-special + MOVED_TO_GROUP_VARS: "true" + .ubuntu_rkt_sep_variables: &ubuntu_rkt_sep_variables # stage: deploy-part1 MOVED_TO_GROUP_VARS: "true" @@ -316,6 +324,10 @@ before_script: # stage: deploy-special MOVED_TO_GROUP_VARS: "true" +.ubuntu_kube_router_variables: &ubuntu_kube_router_variables +# stage: deploy-special + MOVED_TO_GROUP_VARS: "true" + .opensuse_canal_variables: &opensuse_canal_variables # stage: deploy-part2 MOVED_TO_GROUP_VARS: "true" @@ -615,6 +627,17 @@ gce_centos7-calico-ha-triggers: when: on_success only: ['triggers'] +gce_centos7-kube-router: + stage: deploy-special + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *centos7_kube_router_variables + when: manual + except: ['triggers'] + only: ['master', /^pr-.*$/] + gce_opensuse-canal: stage: deploy-part2 <<: *job @@ -638,6 +661,17 @@ gce_coreos-alpha-weave-ha: except: ['triggers'] only: ['master', /^pr-.*$/] +gce_coreos-kube-router: + stage: deploy-special + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *coreos_kube_router_variables + when: manual + except: ['triggers'] + only: ['master', /^pr-.*$/] + gce_ubuntu-rkt-sep: stage: deploy-part2 <<: *job @@ -682,6 +716,17 @@ gce_ubuntu-flannel-sep: except: ['triggers'] only: ['master', /^pr-.*$/] +gce_ubuntu-kube-router-sep: + stage: deploy-special + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *ubuntu_kube_router_variables + when: manual + except: ['triggers'] + only: ['master', /^pr-.*$/] + # Premoderated with manual actions ci-authorized: <<: *job diff --git a/README.md b/README.md index 99a24c1ad73aa627292ad3c5feb30e64823ed1b2..0d2cd5913d41c61a3b0735d4529049e383bf57e0 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,7 @@ Supported Components - [contiv](https://github.com/contiv/install) v1.1.7 - [flanneld](https://github.com/coreos/flannel) v0.10.0 - [weave](https://github.com/weaveworks/weave) v2.4.1 + - [kube-router](https://github.com/cloudnativelabs/kube-router) v0.2.0 - Application - [cephfs-provisioner](https://github.com/kubernetes-incubator/external-storage) v2.1.0-k8s1.11 - [cert-manager](https://github.com/jetstack/cert-manager) v0.5.0 @@ -164,6 +165,11 @@ You can choose between 6 network plugins. (default: `calico`, except Vagrant use - [weave](docs/weave.md): Weave is a lightweight container overlay network that doesn't require an external K/V database cluster. (Please refer to `weave` [troubleshooting documentation](http://docs.weave.works/weave/latest_release/troubleshooting.html)). +- [kube-router](doc/kube-router.md): Kube-router is a L3 CNI for Kubernetes networking aiming to provide operational + simplicity and high performance: it uses IPVS to provide Kube Services Proxy (if setup to replace kube-proxy), + iptables for network policies, and BGP for ods L3 networking (with optionally BGP peering with out-of-cluster BGP peers). + It can also optionally advertise routes to Kubernetes cluster Pods CIDRs, ClusterIPs, ExternalIPs and LoadBalancerIPs. + The choice is defined with the variable `kube_network_plugin`. There is also an option to leverage built-in cloud provider networking instead. See also [Network checker](docs/netcheck.md). diff --git a/docs/kube-router.md b/docs/kube-router.md new file mode 100644 index 0000000000000000000000000000000000000000..dca7490236fe02c32461a66ddbe4fa8c8a4d0e34 --- /dev/null +++ b/docs/kube-router.md @@ -0,0 +1,91 @@ +Kube-router +=========== + +Kube-router is a L3 CNI provider, as such it will setup IPv4 routing between +nodes to provide Pods' networks reachability. + +See [kube-router documentation](https://www.kube-router.io/). + +## Verifying kube-router install + +Kube-router runs its pods as a `DaemonSet` in the `kube-system` namespace: + +* Check the status of kube-router pods + +``` +# From the CLI +kubectl get pod --namespace=kube-system -l k8s-app=kube-router -owide + +# output +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE +kube-router-4f679 1/1 Running 0 2d 192.168.186.4 mykube-k8s-node-nf-2 <none> +kube-router-5slf8 1/1 Running 0 2d 192.168.186.11 mykube-k8s-node-nf-3 <none> +kube-router-lb6k2 1/1 Running 0 20h 192.168.186.14 mykube-k8s-node-nf-6 <none> +kube-router-rzvrb 1/1 Running 0 20h 192.168.186.17 mykube-k8s-node-nf-4 <none> +kube-router-v6n56 1/1 Running 0 2d 192.168.186.6 mykube-k8s-node-nf-1 <none> +kube-router-wwhg8 1/1 Running 0 20h 192.168.186.16 mykube-k8s-node-nf-5 <none> +kube-router-x2xs7 1/1 Running 0 2d 192.168.186.10 mykube-k8s-master-1 <none> +``` + +* Peek at kube-router container logs: + +``` +# From the CLI +kubectl logs --namespace=kube-system -l k8s-app=kube-router | grep Peer.Up + +# output +time="2018-09-17T16:47:14Z" level=info msg="Peer Up" Key=192.168.186.6 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-17T16:47:16Z" level=info msg="Peer Up" Key=192.168.186.11 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-17T16:47:46Z" level=info msg="Peer Up" Key=192.168.186.10 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-18T19:12:24Z" level=info msg="Peer Up" Key=192.168.186.14 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-18T19:12:28Z" level=info msg="Peer Up" Key=192.168.186.17 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-18T19:12:38Z" level=info msg="Peer Up" Key=192.168.186.16 State=BGP_FSM_OPENCONFIRM Topic=Peer +[...] +``` + +## Gathering kube-router state + +Kube-router Pods come bundled with a "Pod Toolbox" which provides very +useful internal state views for: + +* IPVS: via `ipvsadm` +* BGP peering and routing info: via `gobgp` + +You need to `kubectl exec -it ...` into a kube-router container to use these, see +<https://www.kube-router.io/docs/pod-toolbox/> for details. + +## Kube-router configuration + + +You can change the default configuration by overriding `kube_router_...` variables +(as found at `roles/network_plugin/kube-router/defaults/main.yml`), +these are named to follow `kube-router` command-line options as per +<https://www.kube-router.io/docs/user-guide/#try-kube-router-with-cluster-installers>. + +## Caveats + +### kubeadm_enabled: true + +If you want to set `kube-router` to replace `kube-proxy` +(`--run-service-proxy=true`) while using `kubeadm_enabled`, +then 'kube-proxy` DaemonSet will be removed *after* kubeadm finishes +running, as it's not possible to skip kube-proxy install in kubeadm flags +and/or config, see https://github.com/kubernetes/kubeadm/issues/776. + +Given above, if `--run-service-proxy=true` is needed it would be +better to void `kubeadm_enabled` i.e. set: + +``` +kubeadm_enabled: false +kube_router_run_service_proxy: true + +``` + +If for some reason you do want/need to set `kubeadm_enabled`, removing +it afterwards behave better if kube-proxy is set to ipvs mode, i.e. set: + +``` +kubeadm_enabled: true +kube_router_run_service_proxy: true +kube_proxy_mode: ipvs +``` diff --git a/docs/openstack.md b/docs/openstack.md index ef2d1dcf8f07ef15bf8b4f2a4953ec588ca1580b..5d07fb33f2dd67fcaff3c77ee1ae6ac315b94c07 100644 --- a/docs/openstack.md +++ b/docs/openstack.md @@ -8,15 +8,15 @@ After that make sure to source in your OpenStack credentials like you would do w The next step is to make sure the hostnames in your `inventory` file are identical to your instance names in OpenStack. Otherwise [cinder](https://wiki.openstack.org/wiki/Cinder) won't work as expected. -Unless you are using calico you can now run the playbook. +Unless you are using calico or kube-router you can now run the playbook. -**Additional step needed when using calico:** +**Additional step needed when using calico or kube-router:** -Calico does not encapsulate all packages with the hosts' ip addresses. Instead the packages will be routed with the PODs ip addresses directly. +Being L3 CNI, calico and kube-router do not encapsulate all packages with the hosts' ip addresses. Instead the packets will be routed with the PODs ip addresses directly. -OpenStack will filter and drop all packages from ips it does not know to prevent spoofing. +OpenStack will filter and drop all packets from ips it does not know to prevent spoofing. -In order to make calico work on OpenStack you will need to tell OpenStack to allow calico's packages by allowing the network it uses. +In order to make L3 CNIs work on OpenStack you will need to tell OpenStack to allow pods packets by allowing the network they use. First you will need the ids of your OpenStack instances that will run kubernetes: @@ -36,10 +36,14 @@ Then you can use the instance ids to find the connected [neutron](https://wiki.o | 5662a4e0-e646-47f0-bf88-d80fbd2d99ef | e1f48aad-df96-4bce-bf61-62ae12bf3f95 | | e5ae2045-a1e1-4e99-9aac-4353889449a7 | 725cd548-6ea3-426b-baaa-e7306d3c8052 | -Given the port ids on the left, you can set the two `allowed_address`(es) in OpenStack. Note that you have to allow both `kube_service_addresses` (default `10.233.0.0/18`) and `kube_pods_subnet` (default `10.233.64.0/18`.) +Given the port ids on the left, you can set the two `allowed-address`(es) in OpenStack. Note that you have to allow both `kube_service_addresses` (default `10.233.0.0/18`) and `kube_pods_subnet` (default `10.233.64.0/18`.) # allow kube_service_addresses and kube_pods_subnet network openstack port set 5662a4e0-e646-47f0-bf88-d80fbd2d99ef --allowed-address ip-address=10.233.0.0/18 --allowed-address ip-address=10.233.64.0/18 openstack port set e5ae2045-a1e1-4e99-9aac-4353889449a7 --allowed-address ip-address=10.233.0.0/18 --allowed-address ip-address=10.233.64.0/18 +If all the VMs in the tenant correspond to kubespray deployment, you can "sweep run" above with: + + openstack port list --device-owner=compute:nova -c ID -f value | xargs -tI@ openstack port set @ --allowed-address ip-address=10.233.0.0/18 --allowed-address ip-address=10.233.64.0/18 + Now you can finally run the playbook. diff --git a/inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml b/inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d5cb725602a06bcbb00ad46e0d2c9d8d8ec43ba --- /dev/null +++ b/inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml @@ -0,0 +1,37 @@ +# See roles/network_plugin/kube-router//defaults/main.yml + +# Enables Pod Networking -- Advertises and learns the routes to Pods via iBGP +# kube_router_run_router: true + +# Enables Network Policy -- sets up iptables to provide ingress firewall for pods +# kube_router_run_firewall: true + +# Enables Service Proxy -- sets up IPVS for Kubernetes Services +# see docs/kube-router.md "Caveats" section +# kube_router_run_service_proxy: false + +# Add Cluster IP of the service to the RIB so that it gets advertises to the BGP peers. +# kube_router_advertise_cluster_ip: false + +# Add External IP of service to the RIB so that it gets advertised to the BGP peers. +# kube_router_advertise_external_ip: false + +# Add LoadbBalancer IP of service status as set by the LB provider to the RIB so that it gets advertised to the BGP peers. +# kube_router_advertise_loadbalancer_ip: false + +# Array of arbitrary extra arguments to kube-router, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md +# kube_router_extra_args: [] + +# ASN numbers of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr. +# kube_router_peer_router_asns: ~ + +# The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's. +# kube_router_peer_router_ips: ~ + +# The remote port of the external BGP to which all nodes will peer. If not set, default BGP port (179) will be used. +# kube_router_peer_router_ports: ~ + +# Setups node CNI to allow hairpin mode, requires node reboots, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md#hairpin-mode +# kube_router_support_hairpin_mode: false diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 854aa81f613cbdd9e8317c5ba4441e5c94cf3234..918f3eab69f08961431ac26db1f3dd1dc45ce554 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -55,6 +55,7 @@ weave_version: "2.4.1" pod_infra_version: 3.1 contiv_version: 1.2.1 cilium_version: "v1.2.0" +kube_router_version: "v0.2.0" # Download URLs kubeadm_download_url: "https://storage.googleapis.com/kubernetes-release/release/{{ kubeadm_version }}/bin/linux/{{ image_arch }}/kubeadm" @@ -152,6 +153,8 @@ contiv_ovs_image_repo: "contiv/ovs" contiv_ovs_image_tag: "latest" cilium_image_repo: "docker.io/cilium/cilium" cilium_image_tag: "{{ cilium_version }}" +kube_router_image_repo: "cloudnativelabs/kube-router" +kube_router_image_tag: "{{ kube_router_version }}" nginx_image_repo: nginx nginx_image_tag: 1.13 dnsmasq_version: 2.78 @@ -178,6 +181,8 @@ kubednsautoscaler_image_repo: "gcr.io/google_containers/cluster-proportional-aut kubednsautoscaler_image_tag: "{{ kubednsautoscaler_version }}" test_image_repo: busybox test_image_tag: latest +busybox_image_repo: busybox +busybox_image_tag: 1.29.2 helm_version: "v2.9.1" helm_image_repo: "lachlanevenson/k8s-helm" helm_image_tag: "{{ helm_version }}" @@ -391,6 +396,15 @@ downloads: groups: - k8s-cluster + kube_router: + enabled: "{{ kube_network_plugin == 'kube-router' }}" + container: true + repo: "{{ kube_router_image_repo }}" + tag: "{{ kube_router_image_tag }}" + sha256: "{{ kube_router_digest_checksum|default(None) }}" + groups: + - k8s-cluster + pod_infra: enabled: true container: true @@ -472,6 +486,15 @@ downloads: groups: - kube-node + busybox: + enabled: "{{ kube_network_plugin in ['kube-router'] }}" + container: true + repo: "{{ busybox_image_repo }}" + tag: "{{ busybox_image_tag }}" + sha256: "{{ busybox_digest_checksum|default(None) }}" + groups: + - k8s-cluster + testbox: enabled: false container: true diff --git a/roles/kubernetes-apps/network_plugin/kube-router/tasks/main.yml b/roles/kubernetes-apps/network_plugin/kube-router/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..77f5b8bae8ea4327e02f5964bd635ea37dc6a1c7 --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/kube-router/tasks/main.yml @@ -0,0 +1,22 @@ +--- + +- name: kube-router | Start Resources + kube: + name: "kube-router" + kubectl: "{{ bin_dir }}/kubectl" + filename: "{{ kube_config_dir }}/kube-router.yml" + resource: "ds" + namespace: "kube-system" + state: "latest" + when: + - inventory_hostname == groups['kube-master'][0] + +- name: kube-router | Wait for kube-router pods to be ready + command: "{{bin_dir}}/kubectl -n kube-system get pods -l k8s-app=kube-router -o jsonpath='{.items[?(@.status.containerStatuses[0].ready==false)].metadata.name}'" + register: pods_not_ready + until: pods_not_ready.stdout.find("kube-router")==-1 + retries: 30 + delay: 10 + ignore_errors: yes + when: + - inventory_hostname == groups['kube-master'][0] diff --git a/roles/kubernetes-apps/network_plugin/meta/main.yml b/roles/kubernetes-apps/network_plugin/meta/main.yml index 7876401b8feac90065f3dfe97e7255e71b579721..c88dbf0153a1a2d35f045d6e31b9bd8686c6c4e4 100644 --- a/roles/kubernetes-apps/network_plugin/meta/main.yml +++ b/roles/kubernetes-apps/network_plugin/meta/main.yml @@ -29,3 +29,8 @@ dependencies: when: kube_network_plugin == 'weave' tags: - weave + + - role: kubernetes-apps/network_plugin/kube-router + when: kube_network_plugin == 'kube-router' + tags: + - kube-router diff --git a/roles/kubernetes/kubeadm/tasks/main.yml b/roles/kubernetes/kubeadm/tasks/main.yml index 55dbf29a96543c7f6628a6c2322cf5138d306e42..c15b0699b25fa2fb98fa77bdab7d1438d12e516d 100644 --- a/roles/kubernetes/kubeadm/tasks/main.yml +++ b/roles/kubernetes/kubeadm/tasks/main.yml @@ -96,6 +96,9 @@ - kubeadm_config_api_fqdn is not defined - is_kube_master - kubeadm_discovery_address != kube_apiserver_endpoint + - not kube_proxy_remove + tags: + - kube-proxy # FIXME(mattymo): Reconcile kubelet kubeconfig filename for both deploy modes - name: Symlink kubelet kubeconfig for calico/canal @@ -114,3 +117,19 @@ - kubeadm_config_api_fqdn is not defined - is_kube_master - kubeadm_discovery_address != kube_apiserver_endpoint + - not kube_proxy_remove + tags: + - kube-proxy + +# FIXME(jjo): need to post-remove kube-proxy until https://github.com/kubernetes/kubeadm/issues/776 +# is fixed +- name: Delete kube-proxy daemonset if kube_proxy_remove set, e.g. kube_network_plugin providing proxy services + shell: "{{ bin_dir }}/kubectl delete daemonset -n kube-system kube-proxy" + delegate_to: "{{groups['kube-master']|first}}" + run_once: true + when: + - kube_proxy_remove + - is_kube_master + - kubeadm_discovery_address != kube_apiserver_endpoint + tags: + - kube-proxy diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 23a690ce43a2c5a251b9ac9413cbe4216ac50876..c2208a9e012e882178db6e877c23c50e8d477697 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -55,7 +55,7 @@ spec: {% if kube_network_plugin is defined and kube_network_plugin == 'cloud' %} - --configure-cloud-routes=true {% endif %} -{% if kube_network_plugin is defined and kube_network_plugin in ["cloud", "flannel", "canal", "cilium"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["cloud", "flannel", "canal", "cilium", "kube-router"] %} - --allocate-node-cidrs=true - --cluster-cidr={{ kube_pods_subnet }} - --service-cluster-ip-range={{ kube_service_addresses }} diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index 785849f20a8faba2dda70d2950580a063f1d879c..83454f0c7a8e22c8693afbb4e0efbd0df70cc282 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -146,15 +146,26 @@ template: src: manifests/kube-proxy.manifest.j2 dest: "{{ kube_manifest_dir }}/kube-proxy.manifest" - when: not kubeadm_enabled + when: + - not (kubeadm_enabled or kube_proxy_remove) tags: - kube-proxy -- name: Purge proxy manifest for kubeadm +- name: Purge proxy manifest for kubeadm or if proxy services being provided by other means, e.g. network_plugin file: path: "{{ kube_manifest_dir }}/kube-proxy.manifest" state: absent - when: kubeadm_enabled + when: + - kubeadm_enabled or kube_proxy_remove + tags: + - kube-proxy + +- name: Cleanup kube-proxy leftovers from node + command: "{{ docker_bin_dir }}/docker run --rm --privileged -v /lib/modules:/lib/modules --net=host {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} kube-proxy --cleanup" + when: + - kube_proxy_remove + # `kube-proxy --cleanup`, being Ok as per shown WARNING, still returns 255 from above run (?) + ignore_errors: true tags: - kube-proxy diff --git a/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 b/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 index 903544b601fbd7a8aaa9021d87bd2529a5d87f67..96ed45fde3eb01b2229f91c47d43ccab6155a020 100644 --- a/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 +++ b/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 @@ -97,7 +97,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}" {% set all_node_labels = role_node_labels + inventory_node_labels %} KUBELET_ARGS="{{ kubelet_args_base }} {{ kubelet_args_dns }} {{ kube_reserved }} --node-labels={{ all_node_labels | join(',') }} {% if kubelet_custom_flags is string %} {{kubelet_custom_flags}} {% else %}{% for flag in kubelet_custom_flags %} {{flag}} {% endfor %}{% endif %}{% if inventory_hostname in groups['kube-node'] %}{% if kubelet_node_custom_flags is string %} {{kubelet_node_custom_flags}} {% else %}{% for flag in kubelet_node_custom_flags %} {{flag}} {% endfor %}{% endif %}{% endif %}" -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium", "kube-router"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin" {% elif kube_network_plugin is defined and kube_network_plugin == "cloud" %} KUBELET_NETWORK_PLUGIN="--hairpin-mode=promiscuous-bridge --network-plugin=kubenet" diff --git a/roles/kubernetes/node/templates/kubelet.rkt.service.j2 b/roles/kubernetes/node/templates/kubelet.rkt.service.j2 index ee1eaa1b1afdb2bb706893cd1f56bcb31fe6265c..ec1dc49753a59a535bbcee6f8395b88f645a369e 100644 --- a/roles/kubernetes/node/templates/kubelet.rkt.service.j2 +++ b/roles/kubernetes/node/templates/kubelet.rkt.service.j2 @@ -33,7 +33,7 @@ ExecStart=/usr/bin/rkt run \ --volume var-lib-docker,kind=host,source={{ docker_daemon_graph }},readOnly=false \ --volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,readOnly=false,recursive=true \ --volume var-log,kind=host,source=/var/log \ -{% if kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium"] %} +{% if kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium", "kube-router"] %} --volume etc-cni,kind=host,source=/etc/cni,readOnly=true \ --volume opt-cni,kind=host,source=/opt/cni,readOnly=true \ --volume var-lib-cni,kind=host,source=/var/lib/cni,readOnly=false \ diff --git a/roles/kubernetes/node/templates/kubelet.standard.env.j2 b/roles/kubernetes/node/templates/kubelet.standard.env.j2 index f649859fe711e1a3ca2ce4e92c46122708dbd9d8..3af478344fd758dce038d593d16d0c7722612c83 100644 --- a/roles/kubernetes/node/templates/kubelet.standard.env.j2 +++ b/roles/kubernetes/node/templates/kubelet.standard.env.j2 @@ -124,7 +124,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}" KUBELET_ARGS="{{ kubelet_args_base }} {{ kubelet_args_dns }} {{ kubelet_args_kubeconfig }} {{ kube_reserved }} --node-labels={{ all_node_labels | join(',') }} {% if kube_feature_gates %} --feature-gates={{ kube_feature_gates|join(',') }} {% endif %} {% if kubelet_custom_flags is string %} {{kubelet_custom_flags}} {% else %}{% for flag in kubelet_custom_flags %} {{flag}} {% endfor %}{% endif %}{% if inventory_hostname in groups['kube-node'] %}{% if kubelet_node_custom_flags is string %} {{kubelet_node_custom_flags}} {% else %}{% for flag in kubelet_node_custom_flags %} {{flag}} {% endfor %}{% endif %}{% endif %}" -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium", "kube-router"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin" {% elif kube_network_plugin is defined and kube_network_plugin == "weave" %} DOCKER_SOCKET="--docker-endpoint=unix:/var/run/weave/weave.sock" diff --git a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml index 9b16442fedec717388ad550abf84c2b6f4a59b45..72f2bf5283c571adf965fcf26a5ba04efc466cab 100644 --- a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml +++ b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml @@ -26,7 +26,7 @@ - name: Stop if unknown network plugin assert: - that: kube_network_plugin in ['calico', 'canal', 'flannel', 'weave', 'cloud', 'cilium', 'contiv'] + that: kube_network_plugin in ['calico', 'canal', 'flannel', 'weave', 'cloud', 'cilium', 'contiv', 'kube-router'] when: kube_network_plugin is defined ignore_errors: "{{ ignore_assert_errors }}" diff --git a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml index 1fddb7de508d0489439a781f183cfd529452377b..f0d3001de5481d3c3d4937db8bd9a0d79a686aa2 100644 --- a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml @@ -158,3 +158,20 @@ paths: - ../vars skip: true + +- name: force kube_proxy removal if proxy services are replaced by other means + set_fact: + kube_proxy_remove: "{{ (kube_network_plugin == 'kube-router') and (kube_router_run_service_proxy is defined and kube_router_run_service_proxy)| bool }}" + tags: + - facts + - kube-proxy + +- name: override kube_proxy_mode to ipvs if kube_proxy_remove is set, as ipvs won't require kube-proxy cleanup when kube-proxy daemonset gets deleted + set_fact: + kube_proxy_mode: 'ipvs' + when: + - kubeadm_enabled + - kube_proxy_remove + tags: + - facts + - kube-proxy diff --git a/roles/kubernetes/preinstall/tasks/0050-create_directories.yml b/roles/kubernetes/preinstall/tasks/0050-create_directories.yml index 30711603325be79dd765d4fe0893d2c74b3f5527..c508af4c998653336a3dfc42eea7ee247909f0e1 100644 --- a/roles/kubernetes/preinstall/tasks/0050-create_directories.yml +++ b/roles/kubernetes/preinstall/tasks/0050-create_directories.yml @@ -33,7 +33,7 @@ - "/opt/cni/bin" - "/var/lib/calico" when: - - kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium"] + - kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium", "kube-router"] - inventory_hostname in groups['k8s-cluster'] tags: - network @@ -42,6 +42,7 @@ - weave - canal - contiv + - kube-router - bootstrap-os - name: Create local volume provisioner directories diff --git a/roles/kubernetes/secrets/tasks/main.yml b/roles/kubernetes/secrets/tasks/main.yml index d36c3a057289c92632842e5883eb77c509e6c747..232474f67a849f8a7bb087c0cf266cb55da6a5ef 100644 --- a/roles/kubernetes/secrets/tasks/main.yml +++ b/roles/kubernetes/secrets/tasks/main.yml @@ -113,7 +113,11 @@ with_items: - "node-{{ inventory_hostname }}.pem" - "kube-proxy-{{ inventory_hostname }}.pem" - when: inventory_hostname in groups['k8s-cluster'] + when: + - inventory_hostname in groups['k8s-cluster'] + tags: + - node + - kube-proxy - name: "Gen_certs | set kube node certificate serial facts" set_fact: @@ -123,6 +127,7 @@ tags: - kubelet - node + - kube-proxy - import_tasks: gen_tokens.yml tags: diff --git a/roles/network_plugin/kube-router/defaults/main.yml b/roles/network_plugin/kube-router/defaults/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..5aacbc2b0b97b670370da95fc6677ff782031a9c --- /dev/null +++ b/roles/network_plugin/kube-router/defaults/main.yml @@ -0,0 +1,36 @@ +--- +# Enables Pod Networking -- Advertises and learns the routes to Pods via iBGP +kube_router_run_router: true + +# Enables Network Policy -- sets up iptables to provide ingress firewall for pods +kube_router_run_firewall: true + +# Enables Service Proxy -- sets up IPVS for Kubernetes Services +# see docs/kube-router.md "Caveats" section +kube_router_run_service_proxy: false + +# Add Cluster IP of the service to the RIB so that it gets advertises to the BGP peers. +kube_router_advertise_cluster_ip: false + +# Add External IP of service to the RIB so that it gets advertised to the BGP peers. +kube_router_advertise_external_ip: false + +# Add LoadbBalancer IP of service status as set by the LB provider to the RIB so that it gets advertised to the BGP peers. +kube_router_advertise_loadbalancer_ip: false + +# Array of arbitrary extra arguments to kube-router, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md +kube_router_extra_args: [] + +# ASN numbers of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr. +kube_router_peer_router_asns: ~ + +# The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's. +kube_router_peer_router_ips: ~ + +# The remote port of the external BGP to which all nodes will peer. If not set, default BGP port (179) will be used. +kube_router_peer_router_ports: ~ + +# Setups node CNI to allow hairpin mode, requires node reboots, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md#hairpin-mode +kube_router_support_hairpin_mode: false diff --git a/roles/network_plugin/kube-router/tasks/main.yml b/roles/network_plugin/kube-router/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..9fc5b0a7df39cbeaf5103f27cba411893665009f --- /dev/null +++ b/roles/network_plugin/kube-router/tasks/main.yml @@ -0,0 +1,17 @@ +--- + +- name: kube-router | Copy cni plugins from hyperkube + command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -prf /opt/cni/bin/. /cnibindir/" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false + tags: + - hyperkube + - upgrade + +- name: kube-router | Create manifest + template: + src: kube-router.yml.j2 + dest: "{{ kube_config_dir }}/kube-router.yml" diff --git a/roles/network_plugin/kube-router/tasks/reset.yml b/roles/network_plugin/kube-router/tasks/reset.yml new file mode 100644 index 0000000000000000000000000000000000000000..7c3fa53471697dce8d4123bf95d534e5307c22b0 --- /dev/null +++ b/roles/network_plugin/kube-router/tasks/reset.yml @@ -0,0 +1,9 @@ +--- +- name: reset | check kube-dummy-if network device + stat: + path: /sys/class/net/kube-dummy-if + register: kube_dummy_if + +- name: reset | remove the network device created by kube-router + command: ip link del kube-dummy-if + when: kube_dummy_if.stat.exists diff --git a/roles/network_plugin/kube-router/templates/kube-router.yml.j2 b/roles/network_plugin/kube-router/templates/kube-router.yml.j2 new file mode 100644 index 0000000000000000000000000000000000000000..eb150daf14a781f4ccdc365989f45803d7fc82c3 --- /dev/null +++ b/roles/network_plugin/kube-router/templates/kube-router.yml.j2 @@ -0,0 +1,225 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-router-cfg + namespace: kube-system + labels: + tier: node + k8s-app: kube-router +data: + cni-conf.json: | + { + "name":"kubernetes", + "type":"bridge", + "bridge":"kube-bridge", + "isDefaultGateway":true, +{% if kube_router_support_hairpin_mode %} + "hairpinMode":true, +{% endif %} + "ipam": { + "type":"host-local" + } + } + kubeconfig: | + apiVersion: v1 + kind: Config + clusterCIDR: {{ kube_pods_subnet }} + clusters: + - name: cluster + cluster: + certificate-authority: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + server: {{ kube_apiserver_endpoint }} + users: + - name: kube-router + user: + tokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + contexts: + - context: + cluster: cluster + user: kube-router + name: kube-router-context + current-context: kube-router-context + +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + labels: + k8s-app: kube-router + tier: node + name: kube-router + namespace: kube-system +spec: + minReadySeconds: 3 + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + labels: + k8s-app: kube-router + tier: node + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: +{% if kube_version|version_compare('v1.11.1', '>=') %} + priorityClassName: system-cluster-critical +{% endif %} + serviceAccountName: kube-router + containers: + - name: kube-router + image: {{ kube_router_image_repo }}:{{ kube_router_image_tag }} + imagePullPolicy: IfNotPresent + args: + - --run-router={{ kube_router_run_router | bool }} + - --run-firewall={{ kube_router_run_firewall | bool }} + - --run-service-proxy={{ kube_router_run_service_proxy | bool }} + - --kubeconfig=/var/lib/kube-router/kubeconfig +{% if kube_router_advertise_cluster_ip %} + - --advertise-cluster-ip +{% endif %} +{% if kube_router_advertise_external_ip %} + - --advertise-external-ip +{% endif %} +{% if kube_router_advertise_loadbalancer_ip %} + - --advertise-loadbalancer-ip +{% endif %} +{% if kube_router_peer_router_asns %} + - --peer-router-asns {{ kube_router_peer_router_asns }} +{% endif %} +{% if kube_router_peer_router_ips %} + - --peer-router-ips {{ kube_router_peer_router_ips }} +{% endif %} +{% if kube_router_peer_router_ports %} + - --peer-router-ports {{ kube_router_peer_router_ports }} +{% endif %} +{% for arg in kube_router_extra_args %} + - "{{ arg }}" +{% endfor %} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + livenessProbe: + httpGet: + path: /healthz + port: 20244 + initialDelaySeconds: 10 + periodSeconds: 3 + resources: + requests: + cpu: 250m + memory: 250Mi + securityContext: + privileged: true + volumeMounts: + - name: lib-modules + mountPath: /lib/modules + readOnly: true + - name: cni-conf-dir + mountPath: /etc/cni/net.d + - name: kubeconfig + mountPath: /var/lib/kube-router + readOnly: true + initContainers: + - name: install-cni + image: {{ busybox_image_repo }}:{{ busybox_image_tag }} + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - set -e -x; + if [ ! -f /etc/cni/net.d/10-kuberouter.conf ]; then + TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; + cp /etc/kube-router/cni-conf.json ${TMP}; + mv ${TMP} /etc/cni/net.d/10-kuberouter.conf; + fi; + if [ ! -f /var/lib/kube-router/kubeconfig ]; then + TMP=/var/lib/kube-router/.tmp-kubeconfig; + cp /etc/kube-router/kubeconfig ${TMP}; + mv ${TMP} /var/lib/kube-router/kubeconfig; + fi + volumeMounts: + - mountPath: /etc/cni/net.d + name: cni-conf-dir + - mountPath: /etc/kube-router + name: kube-router-cfg + - name: kubeconfig + mountPath: /var/lib/kube-router + hostNetwork: true + tolerations: + - operator: Exists + # Mark pod as critical for rescheduling (Will have no effect starting with kubernetes 1.12) + - key: CriticalAddonsOnly + operator: "Exists" + volumes: + - name: lib-modules + hostPath: + path: /lib/modules + - name: cni-conf-dir + hostPath: + path: /etc/cni/net.d + - name: kube-router-cfg + configMap: + name: kube-router-cfg + - name: kubeconfig + hostPath: + path: /var/lib/kube-router + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-router + namespace: kube-system + +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: kube-router + namespace: kube-system +rules: + - apiGroups: + - "" + resources: + - namespaces + - pods + - services + - nodes + - endpoints + verbs: + - list + - get + - watch + - apiGroups: + - "networking.k8s.io" + resources: + - networkpolicies + verbs: + - list + - get + - watch + - apiGroups: + - extensions + resources: + - networkpolicies + verbs: + - get + - list + - watch +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: kube-router +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-router +subjects: +- kind: ServiceAccount + name: kube-router + namespace: kube-system diff --git a/roles/network_plugin/meta/main.yml b/roles/network_plugin/meta/main.yml index 4a1a7306267081244c6da53bb2f4eef67db08a7a..a0fae72076ad1a97e7c648875e5c477b3b4904f9 100644 --- a/roles/network_plugin/meta/main.yml +++ b/roles/network_plugin/meta/main.yml @@ -30,5 +30,10 @@ dependencies: tags: - contiv + - role: network_plugin/kube-router + when: kube_network_plugin == 'kube-router' + tags: + - kube-router + - role: network_plugin/cloud when: kube_network_plugin == 'cloud' diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 8122ada082e6e33ca9d4923c082216eac2bb78d0..1fff14a27bce2187eb447eb957c84045c3d258a8 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -165,6 +165,7 @@ - /run/contiv - /etc/openvswitch - /run/openvswitch + - /var/lib/kube-router ignore_errors: yes tags: - files @@ -196,7 +197,7 @@ - name: reset | include file with reset tasks specific to the network_plugin if exists include_tasks: "{{ (role_path + '/../network_plugin/' + kube_network_plugin + '/tasks/reset.yml') | realpath }}" when: - - kube_network_plugin in ['flannel', 'cilium', 'contiv'] + - kube_network_plugin in ['flannel', 'cilium', 'contiv', 'kube-router'] tags: - network diff --git a/roles/win_nodes/kubernetes_patch/tasks/main.yml b/roles/win_nodes/kubernetes_patch/tasks/main.yml index a6af1fd906da645f34b764eeafa879fecca0eae0..5e08d88bf0bbb96d53049ecd6b795811acd118f7 100644 --- a/roles/win_nodes/kubernetes_patch/tasks/main.yml +++ b/roles/win_nodes/kubernetes_patch/tasks/main.yml @@ -32,3 +32,5 @@ - debug: msg={{ patch_kube_proxy_state.stderr_lines }} when: patch_kube_proxy_state is not skipped tags: init + when: + - not kube_proxy_remove diff --git a/tests/files/gce_centos7-kube-router.yml b/tests/files/gce_centos7-kube-router.yml new file mode 100644 index 0000000000000000000000000000000000000000..c210d853f53765da285ffeaa3656b5602791a1a8 --- /dev/null +++ b/tests/files/gce_centos7-kube-router.yml @@ -0,0 +1,12 @@ +# Instance settings +cloud_image_family: centos-7 +cloud_region: us-central1-c +cloud_machine_type: "n1-standard-1" +mode: default + +# Deployment settings +kube_network_plugin: kube-router +deploy_netchecker: true +enable_network_policy: true +kubedns_min_replicas: 1 +cloud_provider: gce diff --git a/tests/files/gce_coreos-kube-router.yml b/tests/files/gce_coreos-kube-router.yml new file mode 100644 index 0000000000000000000000000000000000000000..655ca2dd58122b6c8ceab64d290ff5fe4a6417b4 --- /dev/null +++ b/tests/files/gce_coreos-kube-router.yml @@ -0,0 +1,13 @@ +# Instance settings +cloud_image_family: coreos-stable +cloud_region: us-central1-c +mode: default +startup_script: 'systemctl disable locksmithd && systemctl stop locksmithd' + +# Deployment settings +kube_network_plugin: kube-router +bootstrap_os: coreos +resolvconf_mode: host_resolvconf # this is required as long as the coreos stable channel uses docker < 1.12 +deploy_netchecker: true +kubedns_min_replicas: 1 +cloud_provider: gce diff --git a/tests/files/gce_ubuntu-kube-router-sep.yml b/tests/files/gce_ubuntu-kube-router-sep.yml new file mode 100644 index 0000000000000000000000000000000000000000..fde781ff2c6d809845e15379a4714e1711bac7b1 --- /dev/null +++ b/tests/files/gce_ubuntu-kube-router-sep.yml @@ -0,0 +1,11 @@ +# Instance settings +cloud_image_family: ubuntu-1604-lts +cloud_region: us-central1-c +mode: separate + +# Deployment settings +bootstrap_os: ubuntu +kube_network_plugin: kube-router +deploy_netchecker: true +kubedns_min_replicas: 1 +cloud_provider: gce