diff --git a/.travis.yml b/.travis.yml index 863374c26eaa5cd81e36269513c5f16e7b5bce2c..e2a9f9f07c4d7fea08086907c83d8276c338b485 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,8 @@ env: TEST_ID=$TRAVIS_JOB_NUMBER CONTAINER_ENGINE=docker PRIVATE_KEY=$GCE_PRIVATE_KEY + GS_ACCESS_KEY_ID=$GS_KEY + GS_SECRET_ACCESS_KEY=$GS_SECRET ANSIBLE_KEEP_REMOTE_FILES=1 CLUSTER_MODE=default matrix: @@ -101,11 +103,11 @@ env: before_install: # Install Ansible. - - pip install --user boto -U - pip install --user ansible - pip install --user netaddr # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 - pip install --user apache-libcloud==0.20.1 + - pip install --user boto==2.9.0 -U cache: - directories: @@ -122,8 +124,6 @@ before_script: - $HOME/.local/bin/ansible-playbook --version - cp tests/ansible.cfg . # - "echo $HOME/.local/bin/ansible-playbook -i inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e '{\"cloud_provider\": true}' $LOG_LEVEL -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} setup-kubernetes/cluster.yml" - ## Configure ansible deployment logs to be collected as an artifact. Enable when GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/configure-logs.yaml script: - > @@ -147,8 +147,21 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/020_check-create-pod.yml $LOG_LEVEL ## Ping the between 2 pod - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL - ## Collect env info, enable it once GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml + +after_failure: + - > + $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER + -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e dir=$HOME + scripts/collect-info.yaml + - > + $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local + -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} + -e gce_project_id=${GCE_PROJECT_ID} + -e gs_key=${GS_ACCESS_KEY_ID} + -e gs_skey=${GS_SECRET_ACCESS_KEY} + -e ostype=${CLOUD_IMAGE} + -e commit=${TRAVIS_COMMIT} + -e dir=${HOME} after_script: - > diff --git a/cluster.yml b/cluster.yml index 295bb668a6ae554ef274db691d64f7faa5598790..12c0901693ca3eccd8c80cf2ba0628d045ce6ac5 100644 --- a/cluster.yml +++ b/cluster.yml @@ -27,6 +27,8 @@ - hosts: kube-master roles: - { role: kubernetes/master, tags: master } + - { role: kubernetes-apps/lib, tags: apps } + - { role: kubernetes-apps/network_plugin, tags: network } - hosts: k8s-cluster roles: @@ -34,4 +36,5 @@ - hosts: kube-master[0] roles: + - { role: kubernetes-apps/lib, tags: apps } - { role: kubernetes-apps, tags: apps } diff --git a/contrib/terraform/openstack/README.md b/contrib/terraform/openstack/README.md index ec611a4996c19d7d1ba6481d0758f3918266ba78..2840bde9c9f47cc9a16ba1791610c757f3eca2af 100644 --- a/contrib/terraform/openstack/README.md +++ b/contrib/terraform/openstack/README.md @@ -5,14 +5,13 @@ Openstack. ## Status -This will install a Kubernetes cluster on an Openstack Cloud. It is tested on a -OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and -should work on most modern installs of OpenStack that support the basic +This will install a Kubernetes cluster on an Openstack Cloud. It has been tested on a +OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and on OpenStack at [EMBL-EBI's](http://www.ebi.ac.uk/) [EMBASSY Cloud](http://www.embassycloud.org/). This should work on most modern installs of OpenStack that support the basic services. There are some assumptions made to try and ensure it will work on your openstack cluster. -* floating-ips are used for access +* floating-ips are used for access, but you can have masters and nodes that don't use floating-ips if needed. You need currently at least 1 floating ip, which we would suggest is used on a master. * you already have a suitable OS image in glance * you already have both an internal network and a floating-ip pool created * you have security-groups enabled @@ -24,16 +23,14 @@ There are some assumptions made to try and ensure it will work on your openstack ## Terraform -Terraform will be used to provision all of the OpenStack resources required to -run Docker Swarm. It is also used to deploy and provision the software +Terraform will be used to provision all of the OpenStack resources. It is also used to deploy and provision the software requirements. ### Prep #### OpenStack -Ensure your OpenStack credentials are loaded in environment variables. This is -how I do it: +Ensure your OpenStack credentials are loaded in environment variables. This can be done by downloading a credentials .rc file from your OpenStack dashboard and sourcing it: ``` $ source ~/.stackrc @@ -46,7 +43,7 @@ differences between OpenStack installs the Terraform does not attempt to create these for you. By default Terraform will expect that your networks are called `internal` and -`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. +`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. This can be done on a new variables file or through environment variables. A full list of variables you can change can be found at [variables.tf](variables.tf). @@ -76,8 +73,21 @@ $ echo Setting up Terraform creds && \ export TF_VAR_auth_url=${OS_AUTH_URL} ``` +If you want to provision master or node VMs that don't use floating ips, write on a `my-terraform-vars.tfvars` file, for example: + +``` +number_of_k8s_masters = "1" +number_of_k8s_masters_no_floating_ip = "2" +number_of_k8s_nodes_no_floating_ip = "1" +number_of_k8s_nodes = "0" +``` +This will provision one VM as master using a floating ip, two additional masters using no floating ips (these will only have private ips inside your tenancy) and one VM as node, again without a floating ip. + + + # Provision a Kubernetes Cluster on OpenStack +If not using a tfvars file for your setup, then execute: ``` terraform apply -state=contrib/terraform/openstack/terraform.tfstate contrib/terraform/openstack openstack_compute_secgroup_v2.k8s_master: Creating... @@ -96,6 +106,13 @@ use the `terraform show` command. State path: contrib/terraform/openstack/terraform.tfstate ``` +Alternatively, if you wrote your terraform variables on a file `my-terraform-vars.tfvars`, your command would look like: +``` +terraform apply -state=contrib/terraform/openstack/terraform.tfstate -var-file=my-terraform-vars.tfvars contrib/terraform/openstack +``` + +if you choose to add masters or nodes without floating ips (only internal ips on your OpenStack tenancy), this script will create as well a file `contrib/terraform/openstack/k8s-cluster.yml` with an ssh command for ansible to be able to access your machines tunneling through the first floating ip used. If you want to manually handling the ssh tunneling to these machines, please delete or move that file. If you want to use this, just leave it there, as ansible will pick it up automatically. + Make sure you can connect to the hosts: ``` @@ -114,6 +131,8 @@ example-k8s-master-1 | SUCCESS => { } ``` +if you are deploying a system that needs bootstrapping, like CoreOS, these might have a state `FAILED` due to CoreOS not having python. As long as the state is not `UNREACHABLE`, this is fine. + if it fails try to connect manually via SSH ... it could be somthing as simple as a stale host key. Deploy kubernetes: diff --git a/contrib/terraform/openstack/ansible_bastion_template.txt b/contrib/terraform/openstack/ansible_bastion_template.txt new file mode 100644 index 0000000000000000000000000000000000000000..cdf0120668a54795d850369882e8d85705bd842d --- /dev/null +++ b/contrib/terraform/openstack/ansible_bastion_template.txt @@ -0,0 +1 @@ +ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -W %h:%p -q USER@BASTION_ADDRESS"' diff --git a/contrib/terraform/openstack/group_vars/all.yml b/contrib/terraform/openstack/group_vars/all.yml index b73fb66b2dfa73f81d8dc115b4399e416e86c2db..8b0cd2bcdb87182efef15f2c3f91f176b3b9bf3a 100644 --- a/contrib/terraform/openstack/group_vars/all.yml +++ b/contrib/terraform/openstack/group_vars/all.yml @@ -1,9 +1,14 @@ +# Valid bootstrap options (required): xenial, coreos, none +bootstrap_os: "none" + # Directory where the binaries will be installed bin_dir: /usr/local/bin # Where the binaries will be downloaded. # Note: ensure that you've enough disk space (about 1G) local_release_dir: "/tmp/releases" +# Random shifts for retrying failed ops like pushing/downloading +retry_stagger: 5 # Uncomment this line for CoreOS only. # Directory where python binary is installed @@ -28,6 +33,8 @@ kube_users: # Kubernetes cluster name, also will be used as DNS domain cluster_name: cluster.local +# Subdomains of DNS domain to be resolved via /etc/resolv.conf +ndots: 5 # For some environments, each node has a pubilcally accessible # address and an address it should bind services to. These are @@ -51,6 +58,16 @@ cluster_name: cluster.local # but don't know about that address themselves. # access_ip: 1.1.1.1 +# Etcd access modes: +# Enable multiaccess to configure clients to access all of the etcd members directly +# as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. +# This may be the case if clients support and loadbalance multiple etcd servers natively. +etcd_multiaccess: false + +# Assume there are no internal loadbalancers for apiservers exist and listen on +# kube_apiserver_port (default 443) +loadbalancer_apiserver_localhost: true + # Choose network plugin (calico, weave or flannel) kube_network_plugin: flannel @@ -89,10 +106,12 @@ kube_apiserver_insecure_port: 8080 # (http) # You still must manually configure all your containers to use this DNS server, # Kubernetes won't do this for you (yet). +# Do not install additional dnsmasq +skip_dnsmasq: false # Upstream dns servers used by dnsmasq -upstream_dns_servers: - - 8.8.8.8 - - 8.8.4.4 +#upstream_dns_servers: +# - 8.8.8.8 +# - 8.8.4.4 # # # Use dns server : https://github.com/ansibl8s/k8s-skydns/blob/master/skydns-README.md dns_setup: true @@ -109,21 +128,6 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') # like you would do when using nova-client before starting the playbook. # cloud_provider: -# For multi masters architecture: -# kube-proxy doesn't support multiple apiservers for the time being so you'll need to configure your own loadbalancer -# This domain name will be inserted into the /etc/hosts file of all servers -# configuration example with haproxy : -# listen kubernetes-apiserver-https -# bind 10.99.0.21:8383 -# option ssl-hello-chk -# mode tcp -# timeout client 3h -# timeout server 3h -# server master1 10.99.0.26:443 -# server master2 10.99.0.27:443 -# balance roundrobin -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - ## Set these proxy values in order to update docker daemon to use proxies # http_proxy: "" # https_proxy: "" @@ -134,3 +138,7 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') ## An obvious use case is allowing insecure-registry access ## to self hosted registries like so: docker_options: "--insecure-registry={{ kube_service_addresses }}" + +# default packages to install within the cluster +kpm_packages: [] +# - name: kube-system/grafana diff --git a/contrib/terraform/openstack/kubespray.tf b/contrib/terraform/openstack/kubespray.tf index 27217d08bd55190c4698784021273cf4dc673686..ba526b3e0ac20e1709eebc6d8134282510ff3943 100644 --- a/contrib/terraform/openstack/kubespray.tf +++ b/contrib/terraform/openstack/kubespray.tf @@ -70,6 +70,28 @@ resource "openstack_compute_instance_v2" "k8s_master" { ssh_user = "${var.ssh_user}" kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" } + +} + + +resource "openstack_compute_instance_v2" "k8s_master_no_floating_ip" { + name = "${var.cluster_name}-k8s-master-nf-${count.index+1}" + count = "${var.number_of_k8s_masters_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_master}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = [ "${openstack_compute_secgroup_v2.k8s_master.name}", + "${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } } resource "openstack_compute_instance_v2" "k8s_node" { @@ -89,6 +111,28 @@ resource "openstack_compute_instance_v2" "k8s_node" { } } +resource "openstack_compute_instance_v2" "k8s_node_no_floating_ip" { + name = "${var.cluster_name}-k8s-node-nf-${count.index+1}" + count = "${var.number_of_k8s_nodes_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_node}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = ["${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } +} + + + + #output "msg" { # value = "Your hosts are ready to go!\nYour ssh hosts are: ${join(", ", openstack_networking_floatingip_v2.k8s_master.*.address )}" #} diff --git a/contrib/terraform/openstack/variables.tf b/contrib/terraform/openstack/variables.tf index 6c1fc767d743922542c64505d2ba35b64f89222b..8be38aed503762991705fed42ce7772eac870382 100644 --- a/contrib/terraform/openstack/variables.tf +++ b/contrib/terraform/openstack/variables.tf @@ -6,10 +6,18 @@ variable "number_of_k8s_masters" { default = 2 } +variable "number_of_k8s_masters_no_floating_ip" { + default = 2 +} + variable "number_of_k8s_nodes" { default = 1 } +variable "number_of_k8s_nodes_no_floating_ip" { + default = 1 +} + variable "public_key_path" { description = "The path of the ssh pub key" default = "~/.ssh/id_rsa.pub" diff --git a/docs/calico.md b/docs/calico.md index 50744f63f4c7747ac7cf80584ae5dbde9752bddc..a8bffc0dbfb79a33f35e5bb70e54df477c4fbcc9 100644 --- a/docs/calico.md +++ b/docs/calico.md @@ -10,18 +10,42 @@ docker ps | grep calico The **calicoctl** command allows to check the status of the network workloads. * Check the status of Calico nodes +``` +calicoctl node status +``` + +or for versions prior *v1.0.0*: + ``` calicoctl status ``` * Show the configured network subnet for containers +``` + calicoctl get ippool -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl pool show ``` * Show the workloads (ip addresses of containers and their located) +``` +calicoctl get workloadEndpoint -o wide +``` + +and + +``` +calicoctl get hostEndpoint -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl endpoint show --detail ``` diff --git a/docs/ha-mode.md b/docs/ha-mode.md index 792c18a19fb6f1a4d1b1855c96c3815046ae66ea..8ec5c93a169d0048bb9dcc0394ac675ef6726321 100644 --- a/docs/ha-mode.md +++ b/docs/ha-mode.md @@ -5,10 +5,6 @@ The following components require a highly available endpoints: * etcd cluster, * kube-apiserver service instances. -The former provides the -[etcd-proxy](https://coreos.com/etcd/docs/latest/proxy.html) service to access -the cluster members in HA fashion. - The latter relies on a 3rd side reverse proxies, like Nginx or HAProxy, to achieve the same goal. @@ -57,7 +53,7 @@ type. The following diagram shows how traffic to the apiserver is directed. A user may opt to use an external loadbalancer (LB) instead. An external LB provides access for external clients, while the internal LB accepts client -connections only to the localhost, similarly to the etcd-proxy HA endpoints. +connections only to the localhost. Given a frontend `VIP` address and `IP1, IP2` addresses of backends, here is an example configuration for a HAProxy service acting as an external LB: ``` diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml index cbf2e63a2b6d9422be2da212811973ba5563a36c..f72276ae6e9437aaa52a170959d4326ae3094866 100644 --- a/inventory/group_vars/all.yml +++ b/inventory/group_vars/all.yml @@ -62,7 +62,7 @@ ndots: 5 # Enable multiaccess to configure clients to access all of the etcd members directly # as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. # This may be the case if clients support and loadbalance multiple etcd servers natively. -etcd_multiaccess: false +etcd_multiaccess: true # Assume there are no internal loadbalancers for apiservers exist and listen on # kube_apiserver_port (default 443) diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index cbe053fa0a4c877af85a2a65f713e0e76ced9b8e..1ea220fd1bf61afc44ca28aa53b47b71c80f6388 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -10,7 +10,7 @@ kube_version: v1.4.3 etcd_version: v3.0.6 #TODO(mattymo): Move calico versions to roles/network_plugins/calico/defaults # after migration to container download -calico_version: v0.22.0 +calico_version: v1.0.0-beta calico_cni_version: v1.4.2 weave_version: v1.6.1 flannel_version: v0.6.2 @@ -39,9 +39,13 @@ flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}" flannel_image_repo: "quay.io/coreos/flannel" flannel_image_tag: "{{ flannel_version }}" calicoctl_image_repo: "calico/ctl" -calicoctl_image_tag: "{{ calico_version }}" +# TODO(apanchenko): v1.0.0-beta can't execute `node run` from Docker container +# for details see https://github.com/projectcalico/calico-containers/issues/1291 +calicoctl_image_tag: "v0.22.0" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" +calico_cni_image_repo: "calico/cni" +calico_cni_image_tag: "{{ calico_cni_version }}" hyperkube_image_repo: "quay.io/coreos/hyperkube" hyperkube_image_tag: "{{ kube_version }}_coreos.0" pod_infra_image_repo: "gcr.io/google_containers/pause-amd64" @@ -56,7 +60,7 @@ downloads: url: "{{ calico_cni_download_url }}" owner: "root" mode: "0755" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_cni_plugin_ipam: dest: calico/bin/calico-ipam version: "{{calico_cni_version}}" @@ -95,22 +99,27 @@ downloads: container: true repo: "{{ flannel_image_repo }}" tag: "{{ flannel_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" flannel_server_helper: container: true repo: "{{ flannel_server_helper_image_repo }}" tag: "{{ flannel_server_helper_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" calicoctl: container: true repo: "{{ calicoctl_image_repo }}" tag: "{{ calicoctl_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_node: container: true repo: "{{ calico_node_image_repo }}" tag: "{{ calico_node_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" + calico_cni: + container: true + repo: "{{ calico_cni_image_repo }}" + tag: "{{ calico_cni_image_tag }}" + enabled: "{{ kube_network_plugin == 'canal' }}" pod_infra: container: true repo: "{{ pod_infra_image_repo }}" diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml index 02234a2fe773c09bb957ef7d066bd3399fa59867..2df4ba165a912c8386e101890651003bbe9b2954 100644 --- a/roles/etcd/defaults/main.yml +++ b/roles/etcd/defaults/main.yml @@ -1,2 +1,8 @@ --- etcd_bin_dir: "{{ local_release_dir }}/etcd/etcd-{{ etcd_version }}-linux-amd64/" + +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" +etcd_cert_group: root + +etcd_script_dir: "{{ bin_dir }}/etcd-scripts" diff --git a/roles/etcd/files/make-ssl-etcd.sh b/roles/etcd/files/make-ssl-etcd.sh new file mode 100755 index 0000000000000000000000000000000000000000..4c7db9430791b36d74867e3332c876b6427cd978 --- /dev/null +++ b/roles/etcd/files/make-ssl-etcd.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# Author: Smana smainklh@gmail.com +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o pipefail + +usage() +{ + cat << EOF +Create self signed certificates + +Usage : $(basename $0) -f <config> [-d <ssldir>] + -h | --help : Show this message + -f | --config : Openssl configuration file + -d | --ssldir : Directory where the certificates will be installed + + ex : + $(basename $0) -f openssl.conf -d /srv/ssl +EOF +} + +# Options parsing +while (($#)); do + case "$1" in + -h | --help) usage; exit 0;; + -f | --config) CONFIG=${2}; shift 2;; + -d | --ssldir) SSLDIR="${2}"; shift 2;; + *) + usage + echo "ERROR : Unknown option" + exit 3 + ;; + esac +done + +if [ -z ${CONFIG} ]; then + echo "ERROR: the openssl configuration file is missing. option -f" + exit 1 +fi +if [ -z ${SSLDIR} ]; then + SSLDIR="/etc/ssl/etcd" +fi + +tmpdir=$(mktemp -d /tmp/etcd_cacert.XXXXXX) +trap 'rm -rf "${tmpdir}"' EXIT +cd "${tmpdir}" + +mkdir -p "${SSLDIR}" + +# Root CA +openssl genrsa -out ca-key.pem 2048 > /dev/null 2>&1 +openssl req -x509 -new -nodes -key ca-key.pem -days 10000 -out ca.pem -subj "/CN=etcd-ca" > /dev/null 2>&1 + +# ETCD member +openssl genrsa -out member-key.pem 2048 > /dev/null 2>&1 +openssl req -new -key member-key.pem -out member.csr -subj "/CN=etcd-member" -config ${CONFIG} > /dev/null 2>&1 +openssl x509 -req -in member.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out member.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 + +# Nodes and Admin +for i in node admin; do + openssl genrsa -out ${i}-key.pem 2048 > /dev/null 2>&1 + openssl req -new -key ${i}-key.pem -out ${i}.csr -subj "/CN=kube-${i}" > /dev/null 2>&1 + openssl x509 -req -in ${i}.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out ${i}.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 +done + +# Install certs +mv *.pem ${SSLDIR}/ diff --git a/roles/etcd/handlers/main.yml b/roles/etcd/handlers/main.yml index 693754a06364419e0f6750fbb83c6affdd4969fb..badf0bd79e51bb82fcf12ce322a298461086956d 100644 --- a/roles/etcd/handlers/main.yml +++ b/roles/etcd/handlers/main.yml @@ -6,21 +6,14 @@ - reload etcd - wait for etcd up -- name: restart etcd-proxy - command: /bin/true - notify: - - etcd | reload systemd - - reload etcd-proxy - - wait for etcd up - - name: etcd | reload systemd command: systemctl daemon-reload when: ansible_service_mgr == "systemd" - name: wait for etcd up - uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" + uri: url="https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" validate_certs=no register: result - until: result.status == 200 + until: result.status is defined and result.status == 200 retries: 10 delay: 5 @@ -30,8 +23,7 @@ state: restarted when: is_etcd_master -- name: reload etcd-proxy - service: - name: etcd-proxy - state: restarted - when: is_etcd_proxy +- name: set etcd_secret_changed + set_fact: + etcd_secret_changed: true + diff --git a/roles/etcd/tasks/check_certs.yml b/roles/etcd/tasks/check_certs.yml new file mode 100644 index 0000000000000000000000000000000000000000..03a875517cfdfa3bfded02f580c894bfca774344 --- /dev/null +++ b/roles/etcd/tasks/check_certs.yml @@ -0,0 +1,36 @@ +--- +- name: "Check_certs | check if the certs have already been generated on first master" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + delegate_to: "{{groups['etcd'][0]}}" + register: etcdcert_master + run_once: true + +- name: "Check_certs | Set default value for 'sync_certs' and 'gen_certs' to false" + set_fact: + sync_certs: false + gen_certs: false + +- name: "Check_certs | Set 'sync_certs' and 'gen_certs' to true" + set_fact: + gen_certs: true + when: not etcdcert_master.stat.exists + run_once: true + +- name: "Check certs | check if a cert already exists" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + register: etcdcert + +- name: "Check_certs | Set 'sync_certs' to true" + set_fact: + sync_certs: true + when: >- + {%- set certs = {'sync': False} -%} + {%- for server in play_hosts + if (not hostvars[server].etcdcert.stat.exists|default(False)) or + (hostvars[server].etcdcert.stat.checksum|default('') != etcdcert_master.stat.checksum|default('')) -%} + {%- set _ = certs.update({'sync': True}) -%} + {%- endfor -%} + {{ certs.sync }} + run_once: true diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 514a79d731515bb8db0facbf04847969fbb86558..a2ef38f2c7b3281aa8f1e526d0b0a11d1c4806e0 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -26,19 +26,3 @@ mode: 0755 when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_master notify: restart etcd - -- name: Configure | Copy etcd-proxy.service systemd file - template: - src: "etcd-proxy-{{ etcd_deployment_type }}.service.j2" - dest: /etc/systemd/system/etcd-proxy.service - backup: yes - when: ansible_service_mgr == "systemd" and is_etcd_proxy - notify: restart etcd-proxy -- name: Configure | Write etcd-proxy initd script - template: - src: "deb-etcd-proxy-{{ etcd_deployment_type }}.initd.j2" - dest: /etc/init.d/etcd-proxy - owner: root - mode: 0755 - when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_proxy - notify: restart etcd-proxy diff --git a/roles/etcd/tasks/gen_certs.yml b/roles/etcd/tasks/gen_certs.yml new file mode 100644 index 0000000000000000000000000000000000000000..8d1d34b74d6a86790cd4231742da4f9bc31d6419 --- /dev/null +++ b/roles/etcd/tasks/gen_certs.yml @@ -0,0 +1,112 @@ +--- + +- name: Gen_certs | create etcd script dir + file: + path: "{{ etcd_script_dir }}" + state: directory + owner: root + when: inventory_hostname == groups['etcd'][0] + +- name: Gen_certs | create etcd cert dir + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=root + recurse=yes + +- name: Gen_certs | write openssl config + template: + src: "openssl.conf.j2" + dest: "{{ etcd_config_dir }}/openssl.conf" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | copy certs generation script + copy: + src: "make-ssl-etcd.sh" + dest: "{{ etcd_script_dir }}/make-ssl-etcd.sh" + mode: 0700 + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | run cert generation script + command: "{{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + notify: set etcd_secret_changed + +- set_fact: + master_certs: ['ca-key.pem', 'admin.pem', 'admin-key.pem', 'member.pem', 'member-key.pem'] + node_certs: ['ca.pem', 'node.pem', 'node-key.pem'] + +- name: Gen_certs | Gather etcd master certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ master_certs|join(' ') }} {{ node_certs|join(' ') }}| base64 --wrap=0" + register: etcd_master_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Gather etcd node certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: etcd_node_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Copy certs on masters + shell: "echo '{{etcd_master_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and + inventory_hostname != groups['etcd'][0] + +- name: Gen_certs | Copy certs on nodes + shell: "echo '{{etcd_node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['k8s-cluster'] and sync_certs|default(false) and + inventory_hostname not in groups['etcd'] + +- name: Gen_certs | check certificate permissions + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=kube + recurse=yes + +- name: Gen_certs | set permissions on keys + shell: chmod 0600 {{ etcd_cert_dir}}/*key.pem + when: inventory_hostname in groups['etcd'] + changed_when: false + +- name: Gen_certs | target ca-certificate store file + set_fact: + ca_cert_path: |- + {% if ansible_os_family == "Debian" -%} + /usr/local/share/ca-certificates/etcd-ca.crt + {%- elif ansible_os_family == "RedHat" -%} + /etc/pki/ca-trust/source/anchors/etcd-ca.crt + {%- elif ansible_os_family == "CoreOS" -%} + /etc/ssl/certs/etcd-ca.pem + {%- endif %} + +- name: Gen_certs | add CA to trusted CA dir + copy: + src: "{{ etcd_cert_dir }}/ca.pem" + dest: "{{ ca_cert_path }}" + remote_src: true + register: etcd_ca_cert + +- name: Gen_certs | update ca-certificates (Debian/Ubuntu/CoreOS) + command: update-ca-certificates + when: etcd_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] + +- name: Gen_certs | update ca-certificates (RedHat) + command: update-ca-trust extract + when: etcd_ca_cert.changed and ansible_os_family == "RedHat" + diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 3ecaa00e6ff80fccda66a68e8f6f588cb7d2ad3e..15be1a769714845aee5d1fe6a6251c0c3c96bf65 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -1,8 +1,15 @@ --- +- include: pre_upgrade.yml +- include: check_certs.yml +- include: gen_certs.yml - include: install.yml + when: is_etcd_master - include: set_cluster_health.yml + when: is_etcd_master - include: configure.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master - name: Ensure etcd is running service: @@ -11,23 +18,11 @@ enabled: yes when: is_etcd_master -- name: Ensure etcd-proxy is running - service: - name: etcd-proxy - state: started - enabled: yes - when: is_etcd_proxy - - name: Restart etcd if binary changed command: /bin/true notify: restart etcd when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_master -- name: Restart etcd-proxy if binary changed - command: /bin/true - notify: restart etcd-proxy - when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_proxy - # Reload systemd before starting service - meta: flush_handlers @@ -35,4 +30,6 @@ # initial state of the cluster is in `existing` # state insted of `new`. - include: set_cluster_health.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master diff --git a/roles/etcd/tasks/pre_upgrade.yml b/roles/etcd/tasks/pre_upgrade.yml new file mode 100644 index 0000000000000000000000000000000000000000..d1962ea92fc998631ad85eecba3052489bc9b5c4 --- /dev/null +++ b/roles/etcd/tasks/pre_upgrade.yml @@ -0,0 +1,34 @@ +- name: "Pre-upgrade | check for etcd-proxy unit file" + stat: + path: /etc/systemd/system/etcd-proxy.service + register: kube_apiserver_service_file + +- name: "Pre-upgrade | check for etcd-proxy init script" + stat: + path: /etc/init.d/etcd-proxy + register: kube_apiserver_init_script + +- name: "Pre-upgrade | stop etcd-proxy if service defined" + service: + name: etcd-proxy + state: stopped + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + +- name: "Pre-upgrade | remove etcd-proxy service definition" + file: + path: "{{ item }}" + state: absent + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + with_items: + - /etc/systemd/system/etcd-proxy.service + - /etc/init.d/etcd-proxy + +- name: "Pre-upgrade | find etcd-proxy container" + command: docker ps -aq --filter "name=etcd-proxy*" + register: etcd_proxy_container + ignore_errors: true + +- name: "Pre-upgrade | remove etcd-proxy if it exists" + command: "docker rm -f {{item}}" + with_items: "{{etcd_proxy_container.stdout_lines}}" + diff --git a/roles/etcd/tasks/refresh_config.yml b/roles/etcd/tasks/refresh_config.yml index 178466153d2600fae342e430416eaca0e7cbf291..80a03a7d6feb3f13781f87d904ecd71e49e7bf80 100644 --- a/roles/etcd/tasks/refresh_config.yml +++ b/roles/etcd/tasks/refresh_config.yml @@ -5,10 +5,3 @@ dest: /etc/etcd.env notify: restart etcd when: is_etcd_master - -- name: Refresh config | Create etcd-proxy config file - template: - src: etcd-proxy.j2 - dest: /etc/etcd-proxy.env - notify: restart etcd-proxy - when: is_etcd_proxy diff --git a/roles/etcd/templates/deb-etcd-docker.initd.j2 b/roles/etcd/templates/deb-etcd-docker.initd.j2 index a83aae184d0769a61827bf2cfe2e43cf25ee1b4e..4457b37b94718abd2b8a387eeee9ae6a066d555f 100644 --- a/roles/etcd/templates/deb-etcd-docker.initd.j2 +++ b/roles/etcd/templates/deb-etcd-docker.initd.j2 @@ -19,8 +19,9 @@ DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker DAEMON_EXEC=`basename $DAEMON` DAEMON_ARGS="run --restart=always --env-file=/etc/etcd.env \ --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ {% if etcd_after_v3 %} diff --git a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 deleted file mode 100644 index ad0338a09d72b581bce07510b31a161e1a03e666..0000000000000000000000000000000000000000 --- a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin/:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker -DAEMON_EXEC=`basename $DAEMON` -DAEMON_ARGS="run --restart=always --env-file=/etc/etcd-proxy.env \ ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %}" - - -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=root -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - {{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} &>/dev/null || true - sleep 1 - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $DAEMON_EXEC - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 deleted file mode 100644 index d0858bb2f9513913ce842f7209b30c0383e6f7f0..0000000000000000000000000000000000000000 --- a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ bin_dir }}/etcd -DAEMON_ARGS="" -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=etcd -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Read configuration variable file if it is present -[ -f /etc/etcd-proxy.env ] && . /etc/etcd-proxy.env - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $NAME - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/etcd-docker.service.j2 b/roles/etcd/templates/etcd-docker.service.j2 index a37759fecd75ced245caa360bb1d39c98135c92c..ff40b5b59ff0fb115c858e5761c2bf0cefe39011 100644 --- a/roles/etcd/templates/etcd-docker.service.j2 +++ b/roles/etcd/templates/etcd-docker.service.j2 @@ -11,7 +11,8 @@ ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always {# TODO(mattymo): Allow docker IP binding and disable in envfile -p 2380:2380 -p 2379:2379 #} --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ diff --git a/roles/etcd/templates/etcd-proxy-docker.service.j2 b/roles/etcd/templates/etcd-proxy-docker.service.j2 deleted file mode 100644 index bf70f0e7fb7d1c12b625047816c472134780b47e..0000000000000000000000000000000000000000 --- a/roles/etcd/templates/etcd-proxy-docker.service.j2 +++ /dev/null @@ -1,28 +0,0 @@ -[Unit] -Description=etcd-proxy docker wrapper -Wants=docker.socket -After=docker.service - -[Service] -User=root -PermissionsStartOnly=true -ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always \ ---env-file=/etc/etcd-proxy.env \ -{# TODO(mattymo): Allow docker IP binding and disable in envfile - -p 2380:2380 -p 2379:2379 #} ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %} -ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_proxy_member_name | default("etcd-proxy") }} -Restart=always -RestartSec=15s - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy-host.service.j2 b/roles/etcd/templates/etcd-proxy-host.service.j2 deleted file mode 100644 index 4ea5f7bc9d583bcddb8dfa7775991df3cb343a8b..0000000000000000000000000000000000000000 --- a/roles/etcd/templates/etcd-proxy-host.service.j2 +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=etcd-proxy -After=network.target - -[Service] -Type=notify -User=etcd -PermissionsStartOnly=true -EnvironmentFile=/etc/etcd-proxy.env -ExecStart={{ bin_dir }}/etcd -ExecStartPre=/bin/mkdir -p /var/lib/etcd-proxy -ExecStartPre=/bin/chown -R etcd: /var/lib/etcd-proxy -NotifyAccess=all -Restart=always -RestartSec=10s -LimitNOFILE=40000 - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy.j2 b/roles/etcd/templates/etcd-proxy.j2 deleted file mode 100644 index 0a1492a379eabb6c7c9ce1fad0a5c5d32b938951..0000000000000000000000000000000000000000 --- a/roles/etcd/templates/etcd-proxy.j2 +++ /dev/null @@ -1,5 +0,0 @@ -ETCD_DATA_DIR=/var/lib/etcd-proxy -ETCD_PROXY=on -ETCD_LISTEN_CLIENT_URLS={{ etcd_access_endpoint }} -ETCD_NAME={{ etcd_proxy_member_name | default("etcd-proxy") }} -ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index b82116612ac8e02a76d1c43d2a3e17ed3d482857..0b7e1eb9fc49baddd7b4689b05860194068f83a8 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -3,14 +3,19 @@ ETCD_ADVERTISE_CLIENT_URLS={{ etcd_client_url }} ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }} ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} -{% if not is_etcd_proxy %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379,http://127.0.0.1:2379 -{% else %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379 -{% endif %} +ETCD_LISTEN_CLIENT_URLS=https://{{ etcd_address }}:2379,https://127.0.0.1:2379 ETCD_ELECTION_TIMEOUT=10000 ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd -ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380 +ETCD_LISTEN_PEER_URLS=https://{{ etcd_address }}:2380 ETCD_NAME={{ etcd_member_name }} ETCD_PROXY=off ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} + +# TLS settings +ETCD_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_CERT_FILE={{ etcd_cert_dir }}/node.pem +ETCD_KEY_FILE={{ etcd_cert_dir }}/node-key.pem +ETCD_PEER_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_PEER_CERT_FILE={{ etcd_cert_dir }}/member.pem +ETCD_PEER_KEY_FILE={{ etcd_cert_dir }}/member-key.pem +ETCD_PEER_CLIENT_CERT_AUTH=true diff --git a/roles/etcd/templates/openssl.conf.j2 b/roles/etcd/templates/openssl.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..3ea328289a5f6e8945a06e54fada7555de3ef36d --- /dev/null +++ b/roles/etcd/templates/openssl.conf.j2 @@ -0,0 +1,39 @@ +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name + +[req_distinguished_name] + +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names + +[ ssl_client ] +extendedKeyUsage = clientAuth, serverAuth +basicConstraints = CA:FALSE +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid,issuer +subjectAltName = @alt_names + +[ v3_ca ] +basicConstraints = CA:TRUE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names +authorityKeyIdentifier=keyid:always,issuer + +[alt_names] +DNS.1 = localhost +{% for host in groups['etcd'] %} +DNS.{{ 1 + loop.index }} = {{ host }} +{% endfor %} +{% if loadbalancer_apiserver is defined and apiserver_loadbalancer_domain_name is defined %} +{% set idx = groups['etcd'] | length | int + 1 %} +DNS.{{ idx | string }} = {{ apiserver_loadbalancer_domain_name }} +{% endif %} +{% for host in groups['etcd'] %} +IP.{{ 2 * loop.index - 1 }} = {{ hostvars[host]['access_ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +IP.{{ 2 * loop.index }} = {{ hostvars[host]['ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +{% endfor %} +{% set idx = groups['etcd'] | length | int * 2 + 1 %} +IP.{{ idx }} = 127.0.0.1 diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml index b1086aa0d04753dbc7db9bdb129df56ee28fe605..d39d146fd8c1b78af56d4d6b5356a4bddfb03427 100644 --- a/roles/kubernetes-apps/ansible/defaults/main.yml +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -9,4 +9,7 @@ kubedns_image_tag: "{{ kubedns_version }}" kubednsmasq_image_repo: "gcr.io/google_containers/kube-dnsmasq-amd64" kubednsmasq_image_tag: "{{ kubednsmasq_version }}" exechealthz_image_repo: "gcr.io/google_containers/exechealthz-amd64" -exechealthz_image_tag: "{{ exechealthz_version }}" \ No newline at end of file +exechealthz_image_tag: "{{ exechealthz_version }}" + +# SSL +etcd_cert_dir: "/etc/ssl/etcd/ssl" diff --git a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml index f4ac65aeb91844f4cc6cdf35a4c24aed1cdc777c..6ad8dd220c4328e7b1922c4a7c7e21f12fdd4faf 100644 --- a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml +++ b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml @@ -5,6 +5,9 @@ - name: Start of Calico policy controller kube: + name: "calico-policy-controller" kubectl: "{{bin_dir}}/kubectl" - filename: /etc/kubernetes/calico-policy-controller.yml + filename: "/etc/kubernetes/calico-policy-controller.yml" + namespace: "kube-system" + resource: "rs" when: inventory_hostname == groups['kube-master'][0] diff --git a/roles/kubernetes-apps/ansible/tasks/main.yaml b/roles/kubernetes-apps/ansible/tasks/main.yaml index f31eb442bc4fd89e100c5cbb610ad4f7dcd99c55..130a17a6fbd51db251567a0d9f4574f93ac0a0a0 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yaml +++ b/roles/kubernetes-apps/ansible/tasks/main.yaml @@ -18,6 +18,6 @@ with_items: "{{ manifests.results }}" when: inventory_hostname == groups['kube-master'][0] - - include: tasks/calico-policy-controller.yml - when: enable_network_policy is defined and enable_network_policy == True + when: ( enable_network_policy is defined and enable_network_policy == True ) or + ( kube_network_plugin == 'canal' ) diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index 7c0a21cfa4cd66b1bd5abb7e15a4536cdfb4e926..698710b95608f5ad4d576244de3bbaa02b35e9c5 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -26,7 +26,13 @@ spec: image: calico/kube-policy-controller:latest env: - name: ETCD_ENDPOINTS - value: "{{ etcd_endpoint }}" + value: "{{ etcd_access_endpoint }}" + - name: ETCD_CA_CERT_FILE + value: "{{ etcd_cert_dir }}/ca.pem" + - name: ETCD_CERT_FILE + value: "{{ etcd_cert_dir }}/node.pem" + - name: ETCD_KEY_FILE + value: "{{ etcd_cert_dir }}/node-key.pem" # Location of the Kubernetes API - this shouldn't need to be # changed so long as it is used in conjunction with # CONFIGURE_ETC_HOSTS="true". @@ -38,3 +44,11 @@ spec: # This removes the need for KubeDNS to resolve the Service. - name: CONFIGURE_ETC_HOSTS value: "true" + volumeMounts: + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true + volumes: + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs diff --git a/roles/kubernetes-apps/ansible/library/kube.py b/roles/kubernetes-apps/lib/library/kube.py similarity index 100% rename from roles/kubernetes-apps/ansible/library/kube.py rename to roles/kubernetes-apps/lib/library/kube.py diff --git a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6bcd6992dbcb28bd170324b9abc8413a0498e2d --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml @@ -0,0 +1,17 @@ +- name: Create canal ConfigMap + run_once: true + kube: + name: "canal-config" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-config.yaml" + resource: "configmap" + namespace: "kube-system" + +- name: Start flannel and calico-node + run_once: true + kube: + name: "canal-node" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-node.yaml" + resource: "ds" + namespace: "kube-system" diff --git a/roles/kubernetes-apps/network_plugin/meta/main.yml b/roles/kubernetes-apps/network_plugin/meta/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..14a59e5c5c9ddf76fdb0fd15468501d0f241e063 --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: kubernetes-apps/network_plugin/canal + when: kube_network_plugin == 'canal' diff --git a/roles/kubernetes/master/defaults/main.yml b/roles/kubernetes/master/defaults/main.yml index ee32ccf57ecb4b35fd877190e16a9c23a1dc31f0..269ed37141ad8eb1b095f63693dec33103cca8ad 100644 --- a/roles/kubernetes/master/defaults/main.yml +++ b/roles/kubernetes/master/defaults/main.yml @@ -28,3 +28,9 @@ kube_apiserver_insecure_bind_address: 127.0.0.1 # Logging directory (sysvinit systems) kube_log_dir: "/var/log/kubernetes" + +# ETCD cert dir for connecting apiserver to etcd +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" + + diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index ddd6f20853e8a3aa766232e519a80bf71a5f0a16..97e71716a951cf01a6c47c44bba52b1bbb9569fc 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-apiserver namespace: kube-system + labels: + k8s-app: kube-apiserver spec: hostNetwork: true containers: @@ -14,9 +16,12 @@ spec: - --advertise-address={{ ip | default(ansible_default_ipv4.address) }} - --etcd-servers={{ etcd_access_endpoint }} - --etcd-quorum-read=true + - --etcd-cafile={{ etcd_cert_dir }}/ca.pem + - --etcd-certfile={{ etcd_cert_dir }}/node.pem + - --etcd-keyfile={{ etcd_cert_dir }}/node-key.pem - --insecure-bind-address={{ kube_apiserver_insecure_bind_address }} - --apiserver-count={{ kube_apiserver_count }} - - --admission-control=NamespaceLifecycle,NamespaceExists,LimitRanger,ServiceAccount,ResourceQuota + - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota - --service-cluster-ip-range={{ kube_service_addresses }} - --client-ca-file={{ kube_cert_dir }}/ca.pem - --basic-auth-file={{ kube_users_dir }}/known_users.csv @@ -50,6 +55,9 @@ spec: - mountPath: /etc/ssl/certs name: ssl-certs-host readOnly: true + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true - mountPath: /var/log/ name: logfile volumes: @@ -59,6 +67,9 @@ spec: - hostPath: path: /etc/ssl/certs/ name: ssl-certs-host + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs - hostPath: path: /var/log/ name: logfile diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 3a9e1ef1b2e0d1475924ea65ba38f49931c1e6a3..a528f361e22975d170a43712c8a833652370d816 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-controller-manager namespace: kube-system + labels: + k8s-app: kube-controller spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 index 024ddbfaacc6e7e407b961a209e18b12b73028c7..15a705937fbee2caefd98f305e2c2fc69ff4c199 100644 --- a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-scheduler namespace: kube-system + labels: + k8s-app: kube-scheduler spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index a8cb6ce5aad9959adb38d26b009b992c0585036c..700f7eb7550c28b2fadcf3691792610993d587eb 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -11,6 +11,13 @@ owner: kube when: kube_network_plugin == "calico" +- name: Write Canal cni config + template: + src: "cni-canal.conf.j2" + dest: "/etc/cni/net.d/10-canal.conf" + owner: kube + when: kube_network_plugin == "canal" + - name: Write kubelet config file template: src=kubelet.j2 dest={{ kube_config_dir }}/kubelet.env backup=yes notify: diff --git a/roles/kubernetes/node/templates/cni-calico.conf.j2 b/roles/kubernetes/node/templates/cni-calico.conf.j2 index 4615cdabddb31ba5aa830a56460336a90fe7c103..4e9752ef44df725d834e340b486d10b0240a6360 100644 --- a/roles/kubernetes/node/templates/cni-calico.conf.j2 +++ b/roles/kubernetes/node/templates/cni-calico.conf.j2 @@ -1,6 +1,10 @@ { "name": "calico-k8s-network", "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", + "etcd_cert_file": "{{ etcd_cert_dir }}/node.pem", + "etcd_key_file": "{{ etcd_cert_dir }}/node-key.pem", + "etcd_ca_cert_file": "{{ etcd_cert_dir }}/ca.pem", "log_level": "info", "ipam": { "type": "calico-ipam" diff --git a/roles/kubernetes/node/templates/cni-canal.conf.j2 b/roles/kubernetes/node/templates/cni-canal.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..b835443c79869099d70d231bdd71f9d349e7844a --- /dev/null +++ b/roles/kubernetes/node/templates/cni-canal.conf.j2 @@ -0,0 +1,15 @@ +{ + "name": "canal-k8s-network", + "type": "flannel", + "delegate": { + "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", + "log_level": "info", + "policy": { + "type": "k8s" + }, + "kubernetes": { + "kubeconfig": "{{ kube_config_dir }}/node-kubeconfig.yaml" + } + } +} diff --git a/roles/kubernetes/node/templates/kubelet.j2 b/roles/kubernetes/node/templates/kubelet.j2 index 53f2915d9a4cc5143685b8bc9cf9b0317c25fbb5..46678691a7af0ca6698ef49e37398be77c90e4e9 100644 --- a/roles/kubernetes/node/templates/kubelet.j2 +++ b/roles/kubernetes/node/templates/kubelet.j2 @@ -26,7 +26,7 @@ KUBELET_ARGS="--cluster_dns={{ dns_server }} --cluster_domain={{ dns_domain }} - {% else %} KUBELET_ARGS="--kubeconfig={{ kube_config_dir}}/kubelet.kubeconfig --config={{ kube_manifest_dir }} --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}" {% endif %} -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave", "canal"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --network-plugin-dir=/etc/cni/net.d" {% elif kube_network_plugin is defined and kube_network_plugin == "weave" %} DOCKER_SOCKET="--docker-endpoint=unix:/var/run/weave/weave.sock" diff --git a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 index 7abffe053526d3f11386402a46d626bc9266841d..86d1e6f9e93e619d2fd95ba382eb30252f6b375d 100644 --- a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-proxy namespace: kube-system + labels: + k8s-app: kube-proxy spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 index 50e054268088fe3b8213b141fde0244a1e0ef331..8e5dfcc116d94722c06b694d124736524b0d1c76 100644 --- a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: nginx-proxy namespace: kube-system + labels: + k8s-app: kube-nginx spec: hostNetwork: true containers: diff --git a/roles/kubernetes/preinstall/defaults/main.yml b/roles/kubernetes/preinstall/defaults/main.yml index 3eae9757d9d464fdc47f1062a8cacd023398d45f..c02a32e29f92f22b692a204197cd8bdcb36bbbea 100644 --- a/roles/kubernetes/preinstall/defaults/main.yml +++ b/roles/kubernetes/preinstall/defaults/main.yml @@ -45,3 +45,6 @@ openstack_username: "{{ lookup('env','OS_USERNAME') }}" openstack_password: "{{ lookup('env','OS_PASSWORD') }}" openstack_region: "{{ lookup('env','OS_REGION_NAME') }}" openstack_tenant_id: "{{ lookup('env','OS_TENANT_ID') }}" + +# All clients access each node individually, instead of using a load balancer. +etcd_multiaccess: true diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 49e69a9079dc700feb03be1fdb6b284fdcf780fa..5c6520ed3b40fdc7dbe8313c458636c66c5f58fb 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -74,7 +74,7 @@ with_items: - "/etc/cni/net.d" - "/opt/cni/bin" - when: kube_network_plugin in ["calico", "weave"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" + when: kube_network_plugin in ["calico", "weave", "canal"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" - name: Update package management cache (YUM) yum: update_cache=yes name='*' diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index 2dd947dda89dcb8932d26ace7f551aaa56cebefe..aec296c6e9b092ef9effcf3483be2b5a7bba759c 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -23,14 +23,14 @@ - set_fact: etcd_address="{{ ip | default(ansible_default_ipv4['address']) }}" - set_fact: etcd_access_address="{{ access_ip | default(etcd_address) }}" -- set_fact: etcd_peer_url="http://{{ etcd_access_address }}:2380" -- set_fact: etcd_client_url="http://{{ etcd_access_address }}:2379" +- set_fact: etcd_peer_url="https://{{ etcd_access_address }}:2380" +- set_fact: etcd_client_url="https://{{ etcd_access_address }}:2379" - set_fact: etcd_authority="127.0.0.1:2379" -- set_fact: etcd_endpoint="http://{{ etcd_authority }}" +- set_fact: etcd_endpoint="https://{{ etcd_authority }}" - set_fact: etcd_access_addresses: |- {% for item in groups['etcd'] -%} - http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2379{% if not loop.last %},{% endif %} + https://{{ item }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" - set_fact: @@ -41,15 +41,8 @@ - set_fact: etcd_peer_addresses: |- {% for item in groups['etcd'] -%} - {{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} + {{ "etcd"+loop.index|string }}=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} -- set_fact: - etcd_proxy_member_name: |- - {% for host in groups['k8s-cluster'] %} - {% if inventory_hostname == host %}{{"etcd-proxy"+loop.index|string }}{% endif %} - {% endfor %} -- set_fact: - is_etcd_proxy: "{{ inventory_hostname in groups['k8s-cluster'] }}" - set_fact: is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" - set_fact: diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index bec1d9f16165f08203694da752c89fe53cc06fe9..28ae04892e622a6f144b9e05f045d5b669b91e47 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -65,21 +65,21 @@ when: inventory_hostname in groups['kube-master'] changed_when: false -- name: Gen_certs | target ca-certificates directory +- name: Gen_certs | target ca-certificates path set_fact: - ca_cert_dir: |- + ca_cert_path: |- {% if ansible_os_family == "Debian" -%} - /usr/local/share/ca-certificates + /usr/local/share/ca-certificates/kube-ca.crt {%- elif ansible_os_family == "RedHat" -%} - /etc/pki/ca-trust/source/anchors + /etc/pki/ca-trust/source/anchors/kube-ca.crt {%- elif ansible_os_family == "CoreOS" -%} - /etc/ssl/certs + /etc/ssl/certs/kube-ca.pem {%- endif %} - name: Gen_certs | add CA to trusted CA dir copy: src: "{{ kube_cert_dir }}/ca.pem" - dest: "{{ ca_cert_dir }}/kube-ca.crt" + dest: "{{ ca_cert_path }}" remote_src: true register: kube_ca_cert @@ -87,7 +87,7 @@ command: update-ca-certificates when: kube_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] -- name: Gen_certs | update ca-certificatesa (RedHat) +- name: Gen_certs | update ca-certificates (RedHat) command: update-ca-trust extract when: kube_ca_cert.changed and ansible_os_family == "RedHat" diff --git a/roles/network_plugin/calico/defaults/main.yml b/roles/network_plugin/calico/defaults/main.yml index aec7a5e15c78ec3ab7f6aa84f039350299723aa9..7b608ab7e589ca208e52116b5dc03532096d8574 100644 --- a/roles/network_plugin/calico/defaults/main.yml +++ b/roles/network_plugin/calico/defaults/main.yml @@ -8,3 +8,6 @@ ipip: false # Set to true if you want your calico cni binaries to overwrite the # ones from hyperkube while leaving other cni plugins intact. overwrite_hyperkube_cni: true + +calico_cert_dir: /etc/calico/certs +etcd_cert_dir: /etc/ssl/etcd/ssl diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index 46f7298833b27df0c5b12b8a70cc8c4c86a7d3da..6563a1f65e2286b4e8050bfb7df19a70021a114b 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -12,6 +12,24 @@ - meta: flush_handlers +- name: Calico | Create calico certs directory + file: + dest: "{{ calico_cert_dir }}" + state: directory + mode: 0750 + owner: root + group: root + +- name: Calico | Link etcd certificates for calico-node + file: + src: "{{ etcd_cert_dir }}/{{ item.s }}" + dest: "{{ calico_cert_dir }}/{{ item.d }}" + state: hard + with_items: + - {s: "ca.pem", d: "ca_cert.crt"} + - {s: "node.pem", d: "cert.crt"} + - {s: "node-key.pem", d: "key.pem"} + - name: Calico | Install calicoctl container script template: src: calicoctl-container.j2 @@ -41,59 +59,95 @@ when: "{{ overwrite_hyperkube_cni|bool }}" - name: Calico | wait for etcd - uri: url=http://localhost:2379/health + uri: url=https://localhost:2379/health validate_certs=no register: result - until: result.status == 200 + until: result.status == 200 or result.status == 401 retries: 10 delay: 5 - when: inventory_hostname in groups['kube-master'] + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Calico | Check if calico network pool has already been configured - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - status_code: 200,404 + command: |- + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool register: calico_conf + delegate_to: "{{groups['etcd'][0]}}" run_once: true -- name: Calico | Define ipip pool argument +- name: Calico | Check calicoctl version + run_once: true + set_fact: + legacy_calicoctl: "{{ calicoctl_image_tag | version_compare('v1.0.0', '<') }}" + +- name: Calico | Configure calico network pool + shell: > + echo '{ + "kind": "ipPool", + "spec": {"disabled": false, "ipip": {"enabled": {{ cloud_provider is defined or ipip }}}, + "nat-outgoing": {{ nat_outgoing|default(false) and not peer_with_router|default(false) }}}, + "apiVersion": "v1", + "metadata": {"cidr": "{{ kube_pods_subnet }}"} + }' + | {{ bin_dir }}/calicoctl create -f - + environment: + NO_DEFAULT_POOLS: true + run_once: true + when: (not legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) + +- name: Calico (old) | Define ipip pool argument run_once: true set_fact: ipip_arg: "--ipip" - when: cloud_provider is defined or ipip|default(false) + when: (legacy_calicoctl and + cloud_provider is defined or ipip) -- name: Calico | Define nat-outgoing pool argument +- name: Calico (old) | Define nat-outgoing pool argument run_once: true set_fact: nat_arg: "--nat-outgoing" - when: nat_outgoing|default(false) and not peer_with_router|default(false) + when: (legacy_calicoctl and + nat_outgoing|default(false) and not peer_with_router|default(false)) -- name: Calico | Define calico pool task name +- name: Calico (old) | Define calico pool task name run_once: true set_fact: pool_task_name: "with options {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" - when: ipip_arg|default(false) or nat_arg|default(false) + when: (legacy_calicoctl and ipip_arg|default(false) or nat_arg|default(false)) -- name: Calico | Configure calico network pool {{ pool_task_name|default('') }} +- name: Calico (old) | Configure calico network pool {{ pool_task_name|default('') }} command: "{{ bin_dir}}/calicoctl pool add {{ kube_pods_subnet }} {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" environment: NO_DEFAULT_POOLS: true run_once: true - when: calico_conf.status == 404 or "nodes" not in calico_conf.content + when: (legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) - name: Calico | Get calico configuration from etcd - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - register: calico_pools + command: |- + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool + register: calico_pools_raw + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + +- set_fact: + calico_pools: "{{ calico_pools_raw.stdout | from_json }}" run_once: true - name: Calico | Check if calico pool is properly configured fail: msg: 'Only one network pool must be configured and it must be the subnet {{ kube_pods_subnet }}. Please erase calico configuration and run the playbook again ("etcdctl rm --recursive /calico/v1/ipam/v4/pool")' - when: ( calico_pools.json['node']['nodes'] | length > 1 ) or - ( not calico_pools.json['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) + when: ( calico_pools['node']['nodes'] | length > 1 ) or + ( not calico_pools['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) run_once: true - name: Calico | Write /etc/network-environment @@ -124,11 +178,30 @@ enabled: yes - name: Calico | Disable node mesh - shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] + shell: "{{ bin_dir }}/calicoctl config set nodeToNodeMesh off" + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) - name: Calico | Configure peering with router(s) - shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}" + shell: > + echo '{ + "kind": "bgppeer", + "spec": {"asNumber": {{ item.as }}}, + "apiVersion": "v1", + "metadata": {"node": "rack1-host1", "scope": "node", "peerIP": "{{ item.router_id }}"} + }' + | {{ bin_dir }}/calicoctl create -f - with_items: peers - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) +- name: Calico (old) | Disable node mesh + shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Configure peering with router(s) + shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}" + with_items: peers + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) diff --git a/roles/network_plugin/calico/templates/calico-node.service.j2 b/roles/network_plugin/calico/templates/calico-node.service.j2 index a7f7e4bab302e6ba25218f6c1f705ddfefdb6eb1..87a51fac87b8f40ab15cb9845acddd2a406de083 100644 --- a/roles/network_plugin/calico/templates/calico-node.service.j2 +++ b/roles/network_plugin/calico/templates/calico-node.service.j2 @@ -1,17 +1,25 @@ [Unit] Description=Calico per-node agent Documentation=https://github.com/projectcalico/calico-docker -After=docker.service docker.socket etcd-proxy.service -Wants=docker.socket etcd-proxy.service +After=docker.service docker.socket +Wants=docker.socket [Service] User=root PermissionsStartOnly=true +{% if legacy_calicoctl %} {% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% else %} +{% else %} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% endif %} +{% endif %} +{% else %} +{% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% else %} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% endif %} +{% endif %} Restart=always RestartSec=10s diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index 466f1df9395e52d4301f2783a2995b785776a800..7be30928ac15be33dc1ae44d936a2fe84ce71235 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -1,8 +1,13 @@ #!/bin/bash -/usr/bin/docker run --privileged --rm \ ---net=host --pid=host -e ETCD_AUTHORITY={{ etcd_authority }} \ +/usr/bin/docker run -i --privileged --rm \ +--net=host --pid=host \ +-e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ +-e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \ +-e ETCD_CERT_FILE=/etc/calico/certs/cert.crt \ +-e ETCD_KEY_FILE=/etc/calico/certs/key.pem \ -v /usr/bin/docker:/usr/bin/docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/calico:/var/run/calico \ +-v /etc/calico/certs:/etc/calico/certs:ro \ {{ calicoctl_image_repo }}:{{ calicoctl_image_tag}} \ $@ diff --git a/roles/network_plugin/calico/templates/deb-calico.initd.j2 b/roles/network_plugin/calico/templates/deb-calico.initd.j2 index ddbc22959a2c67fed147d84d462d154e6a660e5a..e155cae9c9ebe207f5dc4ee16c4cf19bfb4ac022 100644 --- a/roles/network_plugin/calico/templates/deb-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/deb-calico.initd.j2 @@ -37,7 +37,7 @@ DAEMON_USER=root do_status() { - if [ $($DOCKER ps | awk '{ print $2 }' | grep calico/node | wc -l) -eq 1 ]; then + if [ $($DOCKER ps --format "{{.Image}}" | grep -cw 'calico/node') -eq 1 ]; then return 0 else return 1 @@ -51,7 +51,11 @@ do_start() do_status retval=$? if [ $retval -ne 0 ]; then +{% if legacy_calicoctl %} ${DAEMON} node --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% else %} + ${DAEMON} node run --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% endif %} else return 1 fi @@ -62,7 +66,12 @@ do_start() # do_stop() { +{% if legacy_calicoctl %} ${DAEMON} node stop >> /dev/null || ${DAEMON} node stop --force >> /dev/null +{% else %} + echo "Current version of ${DAEMON} doesn't support 'node stop' command!" + return 1 +{% endif %} } diff --git a/roles/network_plugin/calico/templates/network-environment.j2 b/roles/network_plugin/calico/templates/network-environment.j2 index 086803d1b8abc442839380edbab12f9e43300801..8fd13d36cea7605ea7744615e58fc81ec39a3c85 100644 --- a/roles/network_plugin/calico/templates/network-environment.j2 +++ b/roles/network_plugin/calico/templates/network-environment.j2 @@ -3,7 +3,10 @@ DEFAULT_IPV4={{ip | default(ansible_default_ipv4.address) }} # The Kubernetes master IP -KUBERNETES_MASTER={{ first_kube_master }} +KUBERNETES_MASTER={{ kube_apiserver_endpoint }} # IP and port of etcd instance used by Calico -ETCD_AUTHORITY={{ etcd_authority }} +ETCD_ENDPOINTS={{ etcd_access_endpoint }} +ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt +ETCD_CERT_FILE=/etc/calico/certs/cert.crt +ETCD_KEY_FILE=/etc/calico/certs/key.pem diff --git a/roles/network_plugin/calico/templates/rh-calico.initd.j2 b/roles/network_plugin/calico/templates/rh-calico.initd.j2 index 6fb870652948d76d2caa8eda09955a675faea472..7fea725217e8987b46915ef572bb23b38409d60a 100644 --- a/roles/network_plugin/calico/templates/rh-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/rh-calico.initd.j2 @@ -31,7 +31,7 @@ logfile="/var/log/$prog" do_status() { - if [ $($dockerexec ps | awk '{ print $2 }' | grep calico/node | wc -l) -ne 1 ]; then + if [ $($dockerexec ps --format "{{.Image}}" | grep -cw 'calico/node') -ne 1 ]; then return 1 fi } @@ -53,7 +53,11 @@ do_start() { if [ $retval -ne 0 ]; then printf "Starting $prog:\t" echo "\n$(date)\n" >> $logfile - $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% if legacy_calicoctl %} + $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% else %} + $exec node run --ip=${DEFAULT_IPV4} &>>$logfile +{% endif %} success echo else @@ -65,7 +69,12 @@ do_start() { do_stop() { echo -n $"Stopping $prog: " +{% if legacy_calicoctl %} $exec node stop >> /dev/null || $exec node stop --force >> /dev/null +{% else %} + echo "Current version of ${exec} doesn't support 'node stop' command!" + return 1 +{% endif %} retval=$? echo return $retval diff --git a/roles/network_plugin/canal/defaults/main.yml b/roles/network_plugin/canal/defaults/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..24f7c789baf703d3bf11f28a78b2ac294967e84e --- /dev/null +++ b/roles/network_plugin/canal/defaults/main.yml @@ -0,0 +1,11 @@ +# The interface used by canal for host <-> host communication. +# If left blank, then the interface is chosing using the node's +# default route. +canal_iface: "" + +# Whether or not to masquerade traffic to destinations not within +# the pod network. +canal_masquerade: "true" + +# Log-level +canal_log_level: "info" diff --git a/roles/network_plugin/canal/meta/main.yml b/roles/network_plugin/canal/meta/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..5b8d38d3796d2f16ecd1f6afbf3129e5aefb1781 --- /dev/null +++ b/roles/network_plugin/canal/meta/main.yml @@ -0,0 +1,12 @@ +--- +dependencies: + - role: download + file: "{{ downloads.flannel_server_helper }}" + - role: download + file: "{{ downloads.flannel }}" + - role: download + file: "{{ downloads.calico_node }}" + - role: download + file: "{{ downloads.calicoctl }}" + - role: download + file: "{{ downloads.calico_cni }}" diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..e88cfad7e024b017a83bca696e1ac0b3a706cc4d --- /dev/null +++ b/roles/network_plugin/canal/tasks/main.yml @@ -0,0 +1,34 @@ +--- +- name: Canal | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + +- name: Canal | Write canal configmap + template: + src: canal-config.yml.j2 + dest: /etc/kubernetes/canal-config.yaml + +- name: Canal | Write canal node configuration + template: + src: canal-node.yml.j2 + dest: /etc/kubernetes/canal-node.yaml + +- name: Canal | Copy cni plugins from hyperkube + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false + +- name: Canal | Copy cni plugins from calico/cni + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false diff --git a/roles/network_plugin/canal/templates/canal-config.yml.j2 b/roles/network_plugin/canal/templates/canal-config.yml.j2 new file mode 100644 index 0000000000000000000000000000000000000000..34f3faedb1710cf0893906b86faea8ea694fdf93 --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-config.yml.j2 @@ -0,0 +1,22 @@ +# This ConfigMap can be used to configure a self-hosted Canal installation. +# See `canal.yaml` for an example of a Canal deployment which uses +# the config in this ConfigMap. +kind: ConfigMap +apiVersion: v1 +metadata: + name: canal-config +data: + # Configure this with the location of your etcd cluster. + etcd_endpoints: "{{ etcd_access_endpoint }}" + + # The interface used by canal for host <-> host communication. + # If left blank, then the interface is chosing using the node's + # default route. + flanneld_iface: "{{ canal_iface }}" + + # Whether or not to masquerade traffic to destinations not within + # the pod network. + masquerade: "{{ canal_masquerade }}" + + # Cluster name for Flannel etcd path + cluster_name: "{{ cluster_name }}" diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 new file mode 100644 index 0000000000000000000000000000000000000000..ef6793f30df19e4758e24ac3c481e47c6c709251 --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -0,0 +1,119 @@ +--- +kind: DaemonSet +apiVersion: extensions/v1beta1 +metadata: + name: canal-node + labels: + k8s-app: canal-node +spec: + selector: + matchLabels: + k8s-app: canal-node + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + scheduler.alpha.kubernetes.io/tolerations: '[{"key":"CriticalAddonsOnly", "operator":"Exists"}]' + labels: + k8s-app: canal-node + spec: + hostNetwork: true + volumes: + # Used by calico/node. + - name: lib-modules + hostPath: + path: /lib/modules + - name: var-run-calico + hostPath: + path: /var/run/calico + # Used to install CNI. + - name: cni-bin-dir + hostPath: + path: /opt/cni/bin + - name: cni-net-dir + hostPath: + path: /etc/cni/net.d + # Used by flannel daemon. + - name: run-flannel + hostPath: + path: /run/flannel + - name: resolv + hostPath: + path: /etc/resolv.conf + containers: + # Runs the flannel daemon to enable vxlan networking between + # container hosts. + - name: flannel + image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" + env: + # Cluster name + - name: CLUSTER_NAME + valueFrom: + configMapKeyRef: + name: canal-config + key: cluster_name + # The location of the etcd cluster. + - name: FLANNELD_ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints + # The interface flannel should run on. + - name: FLANNELD_IFACE + valueFrom: + configMapKeyRef: + name: canal-config + key: flanneld_iface + # Perform masquerade on traffic leaving the pod cidr. + - name: FLANNELD_IP_MASQ + valueFrom: + configMapKeyRef: + name: canal-config + key: masquerade + # Set etcd-prefix + - name: DOCKER_OPT_ETCD_PREFIX + value: "-etcd-prefix=/$(CLUSTER_NAME)/network" + # Write the subnet.env file to the mounted directory. + - name: FLANNELD_SUBNET_FILE + value: "/run/flannel/subnet.env" + command: + - "/bin/sh" + - "-c" + - "/opt/bin/flanneld -etcd-prefix /$(CLUSTER_NAME)/network" + ports: + - hostPort: 10253 + containerPort: 10253 + securityContext: + privileged: true + volumeMounts: + - name: "resolv" + mountPath: "/etc/resolv.conf" + - name: "run-flannel" + mountPath: "/run/flannel" + # Runs calico/node container on each Kubernetes node. This + # container programs network policy and local routes on each + # host. + - name: calico-node + image: "{{ calico_node_image_repo }}:{{ calico_node_image_tag }}" + env: + # The location of the etcd cluster. + - name: ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints + # Disable Calico BGP. Calico is simply enforcing policy. + - name: CALICO_NETWORKING + value: "false" + # Disable file logging so `kubectl logs` works. + - name: CALICO_DISABLE_FILE_LOGGING + value: "true" + securityContext: + privileged: true + volumeMounts: + - mountPath: /lib/modules + name: lib-modules + readOnly: true + - mountPath: /var/run/calico + name: var-run-calico + readOnly: false diff --git a/roles/network_plugin/flannel/tasks/main.yml b/roles/network_plugin/flannel/tasks/main.yml index a6fa183efb1cf36ebfab098b64bc8da4e4ce60fe..8581d2ce7228cb9a60830ec8fd20bd11844e1f11 100644 --- a/roles/network_plugin/flannel/tasks/main.yml +++ b/roles/network_plugin/flannel/tasks/main.yml @@ -1,9 +1,11 @@ --- -- name: Flannel | Write flannel configuration - template: - src: network.json - dest: /etc/flannel-network.json - backup: yes +- name: Flannel | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Flannel | Create flannel pod manifest template: diff --git a/roles/network_plugin/flannel/templates/flannel-pod.yml b/roles/network_plugin/flannel/templates/flannel-pod.yml index 15523bdde83c86725e1ff8806805b41981f9b026..02c41e18ba83f3dc0ab4bc3842c3fff53806bd36 100644 --- a/roles/network_plugin/flannel/templates/flannel-pod.yml +++ b/roles/network_plugin/flannel/templates/flannel-pod.yml @@ -12,26 +12,16 @@ - name: "subnetenv" hostPath: path: "/run/flannel" - - name: "networkconfig" + - name: "etcd-certs" hostPath: - path: "/etc/flannel-network.json" + path: "{{ etcd_cert_dir }}" containers: - - name: "flannel-server-helper" - image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" - args: - - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-server={{ etcd_endpoint }}" - volumeMounts: - - name: "networkconfig" - mountPath: "/etc/flannel-network.json" - imagePullPolicy: "Always" - name: "flannel-container" image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" command: - "/bin/sh" - "-c" - - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" + - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network -etcd-cafile {{ etcd_cert_dir }}/ca.pem -etcd-certfile {{ etcd_cert_dir }}/node.pem -etcd-keyfile {{ etcd_cert_dir }}/node-key.pem {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" ports: - hostPort: 10253 containerPort: 10253 @@ -41,6 +31,8 @@ volumeMounts: - name: "subnetenv" mountPath: "/run/flannel" + - name: "etcd-certs" + mountPath: "{{ etcd_cert_dir }}" securityContext: privileged: true hostNetwork: true diff --git a/roles/network_plugin/flannel/templates/network.json b/roles/network_plugin/flannel/templates/network.json deleted file mode 100644 index cbbec384140c4bca6cc2f19978e952bc66f779be..0000000000000000000000000000000000000000 --- a/roles/network_plugin/flannel/templates/network.json +++ /dev/null @@ -1 +0,0 @@ -{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } diff --git a/roles/network_plugin/meta/main.yml b/roles/network_plugin/meta/main.yml index 736262ab01530d6efeee777c788cb3770f0081da..8596c9d70bd4d8b083e8368ffa97ae804a82821f 100644 --- a/roles/network_plugin/meta/main.yml +++ b/roles/network_plugin/meta/main.yml @@ -6,3 +6,5 @@ dependencies: when: kube_network_plugin == 'flannel' - role: network_plugin/weave when: kube_network_plugin == 'weave' + - role: network_plugin/canal + when: kube_network_plugin == 'canal' diff --git a/roles/uploads/defaults/main.yml b/roles/uploads/defaults/main.yml index 0774d324c1d327fbe8b190feffec5b47b6532e0c..7b57978817f72dc4fe7c8c4a3d1f82e62296ba14 100644 --- a/roles/uploads/defaults/main.yml +++ b/roles/uploads/defaults/main.yml @@ -5,7 +5,7 @@ local_release_dir: /tmp kube_version: v1.4.3 etcd_version: v3.0.6 -calico_version: v0.22.0 +calico_version: v0.23.0 calico_cni_version: v1.4.2 weave_version: v1.6.1 diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 67d4c8b3540455be4b873a68eb6f35f23217d20a..0ba47866e07b12dcb880d3cc5579020ae686e357 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -1,21 +1,14 @@ --- - hosts: all - become: true + become: false gather_facts: no vars: - debug: false commands: - - name: git_info - cmd: find . -type d -name .git -execdir sh -c 'gen-gitinfos.sh global|head -12' \; - name: timedate_info cmd: timedatectl status - - name: space_info - cmd: df -h - name: kernel_info cmd: uname -r - - name: distro_info - cmd: cat /etc/issue.net - name: docker_info cmd: docker info - name: ip_info @@ -24,23 +17,66 @@ cmd: ip ro - name: proc_info cmd: ps auxf | grep -v ]$ - - name: systemctl_info - cmd: systemctl status - name: systemctl_failed_info cmd: systemctl --state=failed --no-pager - name: k8s_info cmd: kubectl get all --all-namespaces -o wide - name: errors_info cmd: journalctl -p err --utc --no-pager + - name: etcd_info + cmd: etcdctl --debug cluster-health + - name: weave_info + cmd: weave report | jq "." + - name: weave_logs + cmd: docker logs weave > weave.log + - name: kubedns_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kubedns -o name`; + do kubectl logs $i --namespace kube-system kubedns > kubedns.log; done" + - name: apiserver_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-apiserver -o name`; + do kubectl logs $i --namespace kube-system > kube-apiserver.log; done" + - name: controller_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-controller -o name`; + do kubectl logs $i --namespace kube-system > kube-controller.log; done" + - name: scheduler_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-scheduler -o name`; + do kubectl logs $i --namespace kube-system > kube-scheduler.log; done" + - name: proxy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-proxy -o name`; + do kubectl logs $i --namespace kube-system > kube-proxy.log; done" + - name: nginx_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-nginx -o name`; + do kubectl logs $i --namespace kube-system > kube-nginx.log; done" + - name: flannel_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l app=flannel -o name`; + do kubectl logs $i --namespace kube-system flannel-container > flannel.log; done" + - name: canal_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=canal-node -o name`; + do kubectl logs $i --namespace kube-system flannel > flannel.log; done" + - name: calico_policy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=calico-policy -o name`; + do kubectl logs $i --namespace kube-system calico-policy-controller > calico-policy-controller.log; done" logs: - - /var/log/ansible.log - - /var/log/ansible/ansible.log - /var/log/syslog - /var/log/daemon.log - /var/log/kern.log - - inventory/inventory.ini - - cluster.yml + - /var/log/dpkg.log + - /var/log/apt/history.log + - /var/log/yum.log + - /var/log/calico/bird/current + - /var/log/calico/bird6/current + - /var/log/calico/felix/current + - /var/log/calico/confd/current + - weave.log + - kubedns.log + - kube-apiserver.log + - kube-controller.log + - kube-scheduler.log + - kube-proxy.log + - kube-nginx.log + - flannel.log + - calico-policy-controller.log tasks: - name: Storing commands output @@ -48,10 +84,7 @@ register: output ignore_errors: true with_items: "{{commands}}" - - - debug: var=item - with_items: output.results - when: debug + no_log: True - name: Fetch results fetch: src={{ item.name }} dest=/tmp/collect-info/commands @@ -62,7 +95,7 @@ with_items: "{{logs}}" - name: Pack results and logs - local_action: shell GZIP=-9 tar --remove-files -cvzf logs.tar.gz -C /tmp collect-info + local_action: shell GZIP=-9 tar --remove-files -cvzf {{dir|default(".")}}/logs.tar.gz -C /tmp collect-info run_once: true - name: Clean up collected command outputs diff --git a/scripts/configure-logs.yaml b/scripts/configure-logs.yaml deleted file mode 100644 index d093e9279983693ea850040ffcd4d39402cc9a74..0000000000000000000000000000000000000000 --- a/scripts/configure-logs.yaml +++ /dev/null @@ -1,39 +0,0 @@ ---- -- hosts: localhost - become: true - gather_facts: no - - vars: - log_path: /var/log/ansible/ - conf_file: /etc/ansible/ansible.cfg - human_readable_plugin: false - callback_plugin_path: /usr/share/ansible/plugins/callback - - tasks: - - name: LOGS | ensure log path - file: path="{{log_path}}" state=directory owner={{ansible_ssh_user}} - - - name: LOGS | ensure plugin path - file: path="{{callback_plugin_path}}" state=directory owner={{ansible_ssh_user}} - when: human_readable_plugin - - - name: LOGS | get plugin - git: repo=https://gist.github.com/cd706de198c85a8255f6.git dest=/tmp/cd706de198c85a8255f6 - when: human_readable_plugin - - - name: LOGS | install plugin - copy: src=/tmp/cd706de198c85a8255f6/human_log.py dest="{{callback_plugin_path}}" - when: human_readable_plugin - - - name: LOGS | config - lineinfile: - line: "log_path={{log_path}}/ansible.log" - regexp: "^#log_path|^log_path" - dest: "{{conf_file}}" - - - name: LOGS | callback plugin - lineinfile: - line: "callback_plugins={{callback_plugin_path}}" - regexp: "^#callback_plugins|^callback_plugins" - dest: "{{conf_file}}" - when: human_readable_plugin diff --git a/tests/cloud_playbooks/templates/boto.j2 b/tests/cloud_playbooks/templates/boto.j2 new file mode 100644 index 0000000000000000000000000000000000000000..660f1a0a30f544a65fa7e7579867d0b3d282b02c --- /dev/null +++ b/tests/cloud_playbooks/templates/boto.j2 @@ -0,0 +1,11 @@ +[Credentials] +gs_access_key_id = {{ gs_key }} +gs_secret_access_key = {{ gs_skey }} +[Boto] +https_validate_certificates = True +[GoogleCompute] +[GSUtil] +default_project_id = {{ gce_project_id }} +content_language = en +default_api_version = 2 +[OAuth2] diff --git a/tests/cloud_playbooks/templates/gcs_life.json.j2 b/tests/cloud_playbooks/templates/gcs_life.json.j2 new file mode 100644 index 0000000000000000000000000000000000000000..a666c8feffd1668ece36ba95a24ed6c4043dd3d8 --- /dev/null +++ b/tests/cloud_playbooks/templates/gcs_life.json.j2 @@ -0,0 +1,9 @@ +{ + "rule": + [ + { + "action": {"type": "Delete"}, + "condition": {"age": {{expire_days}}} + } + ] +} diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml new file mode 100644 index 0000000000000000000000000000000000000000..80d651ba440670b74323dc2a98efeeec5a81c85b --- /dev/null +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -0,0 +1,75 @@ +--- +- hosts: localhost + become: false + gather_facts: no + + vars: + expire_days: 2 + + tasks: + - name: Generate uniq bucket name prefix + shell: date +%Y%m%d + register: out + + - name: replace_test_id + set_fact: + test_name: "kargo-ci-{{ out.stdout }}" + + - set_fact: + file_name: "{{ostype}}-{{kube_network_plugin}}-{{commit}}-logs.tar.gz" + + - name: Create a bucket + gc_storage: + bucket: "{{ test_name }}" + mode: create + permission: public-read + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + no_log: True + + - name: Create a lifecycle template for the bucket + template: + src: gcs_life.json.j2 + dest: "{{dir}}/gcs_life.json" + + - name: Create a boto config to access GCS + template: + src: boto.j2 + dest: "{{dir}}/.boto" + no_log: True + + - name: Download gsutil cp installer + get_url: + url: https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash + dest: "{{dir}}/gcp-installer.sh" + + - name: Get gsutil tool + script: "{{dir}}/gcp-installer.sh" + environment: + CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + CLOUDSDK_INSTALL_DIR: "{{dir}}" + no_log: True + ignore_errors: true + + - name: Apply the lifecycle rules + command: "{{dir}}/google-cloud-sdk/bin/gsutil lifecycle set {{dir}}/gcs_life.json gs://{{test_name}}" + environment: + BOTO_CONFIG: "{{dir}}/.boto" + no_log: True + + - name: Upload collected diagnostic info + gc_storage: + bucket: "{{ test_name }}" + mode: put + permission: public-read + object: "{{ file_name }}" + src: "{{dir}}/logs.tar.gz" + headers: '{"Content-Encoding": "x-gzip"}' + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + expiration: "{{expire_days * 36000|int}}" + ignore_errors: true + no_log: True + + - debug: + msg: "A public url https://storage.googleapis.com/{{test_name}}/{{file_name}}"