kubespray

[!NOTE|label:references:]

kubespray

[!NOTE|label:references:]

  • variables:

    $ VERSION='v2.26.0'
    $ INVENTORY_DIR='sms-k8s'
    $ SSH_KEY_NAME='sms-k8s'

requirements

setup ssh key to root user

$ ssh-keygen -t ed25519 -C "${SSH_KEY_NAME}" -f ~/.ssh/"${SSH_KEY_NAME}" -P ''

# copy to remote servers
$ echo {01..05} | fmt -1 | while read -r _id; do
    ssh root@k8s-${_id} "[[ -d /root/.ssh ]] || mkdir -p /root/.ssh; chmod 700 /root/.ssh; [[ -f /root/.ssh/authorized_keys ]] || touch /root/.ssh/authorized_keys; chmod 600 /root/.ssh/authorized_keys"
    ssh root@k8s-${_id} "cat >> /root/.ssh/authorized_keys" < ~/.ssh/"${SSH_KEY_NAME}".pub
  done

# verify
$ ssh -vT -i ~/.ssh/"${SSH_KEY_NAME}" root@k8s-01

modify inventory

$ git clone --branch "${VERSION}" https://github.com/kubernetes-sigs/kubespray.git kubespray-${VERSION}

# setup hosts.yaml
$ declare -a IPS=(10.68.78.221 10.68.78.222 10.68.78.223)
$ CONFIG_FILE=inventory/"${INVENTORY_DIR}"/hosts.yaml python3 contrib/inventory_builder/inventory.py ${IPS[@]}

using container

# same version with kubespray ansbile playbook
$ docker pull quay.io/kubespray/kubespray:${VERSION}

$ cp -r ${INVENTORY_DIR} ~/kubespary-${VERSION}/inventory/
$ docker run --rm -it \
         --mount type=bind,source="$(pwd)"/kubespray-${VERSION}/inventory/${INVENTORY_DIR},dst=/kubespray/inventory/${INVENTORY_DIR} \
         --mount type=bind,source="${HOME}"/.ssh/${SSH_KEY_NAME},dst=/kubespray/inventory/${INVENTORY_DIR}/.ssh/${SSH_KEY_NAME} \
         quay.io/kubespray/kubespray:${VERSION} \
         bash

# inside container
$ ansible-playbook -i inventory/${INVENTORY_DIR}/hosts.yaml \
                   --become --become-user=root \
                   --private-key /kubespray/inventory/${INVENTORY_DIR}/.ssh/${SSH_KEY_NAME} \
                   <ACTION>.yml --flush-cache -v

using local environment

  • python3

    $ sudo dnf install -y python3.12
    $ sudo update-alternatives --config python3
    $ python3 < <(curl -s https://bootstrap.pypa.io/get-pip.py)
    $ python3 -m pip install --upgrade pip
  • ansible install

    $ python3 -m pip install pipx
    $ pipx install ansible --include-deps
    # venv
    $ python3 -m venv ~/.venv/kubespray
    $ source ~/.venv/kubespray/bin/activate
    
    # install requirements
    $ python3 -m pip install -U -r requirements.txt
    $ python3 -m pip isntall -U -r contrib/inventory_builder/requirements.txt
    $ python3 -m pip install selinux

execute ansible playbook

reset

$ ansible-playbook -i inventory/"${INVENTORY_DIR}"/hosts.yaml \
                   --become --become-user=root \
                   --private-key inventory/${INVENTORY_DIR}/.ssh/${SSH_KEY_NAME} \
                   reset.yml --flush-cache -v

deploy

$ ansible-playbook -i inventory/"${INVENTORY_DIR}"/hosts.yaml \
                   --become --become-user=root \
                   --private-key inventory/${INVENTORY_DIR}/.ssh/${SSH_KEY_NAME} \
                   cluster.yml --flush-cache -v

# running in k8s control plane
$ [[ -d $HOME/.kube ]] && rm -rf $HOME/.kube
$ mkdir -p $HOME/.kube
$ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
$ sudo chown $(id -u):$(id -g) $HOME/.kube/config
PLAY RECAP *********************************************************************************************************************************
k8s-01              : ok=664  changed=104  unreachable=0    failed=0    skipped=1075 rescued=0    ignored=5
k8s-02              : ok=565  changed=90   unreachable=0    failed=0    skipped=952  rescued=0    ignored=2
k8s-03              : ok=567  changed=93   unreachable=0    failed=0    skipped=950  rescued=0    ignored=2
k8s-04              : ok=403  changed=50   unreachable=0    failed=0    skipped=622  rescued=0    ignored=0
k8s-05              : ok=403  changed=50   unreachable=0    failed=0    skipped=618  rescued=0    ignored=0

Friday 13 September 2024  02:36:27 +0000 (0:00:00.283)       3:06:50.832 ******
===============================================================================
etcd : Gen_certs | Write etcd member/admin and kube_control_plane client certs to other etcd nodes ------------------------------- 1412.84s
/kubespray/roles/etcd/tasks/gen_certs_script.yml:87 ---------------------------------------------------------------------------------------
kubernetes-apps/ingress_controller/ingress_nginx : NGINX Ingress Controller | Create manifests ------------------------------------ 779.09s
/kubespray/roles/kubernetes-apps/ingress_controller/ingress_nginx/tasks/main.yml:49 -------------------------------------------------------
kubernetes-apps/ansible : Kubernetes Apps | Lay Down CoreDNS templates ------------------------------------------------------------ 778.89s
/kubespray/roles/kubernetes-apps/ansible/tasks/coredns.yml:2 ------------------------------------------------------------------------------
network_plugin/calico : Calico | Create calico manifests -------------------------------------------------------------------------- 424.96s
/kubespray/roles/network_plugin/calico/tasks/install.yml:371 ------------------------------------------------------------------------------
policy_controller/calico : Create calico-kube-controllers manifests --------------------------------------------------------------- 280.41s
/kubespray/roles/kubernetes-apps/policy_controller/calico/tasks/main.yml:2 ----------------------------------------------------------------
etcd : Gen_certs | Gather etcd member/admin and kube_control_plane client certs from first etcd node ------------------------------ 210.21s
/kubespray/roles/etcd/tasks/gen_certs_script.yml:63 ---------------------------------------------------------------------------------------
kubernetes/control-plane : Joining control plane node to the cluster. ------------------------------------------------------------- 128.20s
/kubespray/roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml:86 ------------------------------------------------------------------
kubernetes-apps/ingress_controller/ingress_nginx : NGINX Ingress Controller | Apply manifests ------------------------------------- 113.23s
/kubespray/roles/kubernetes-apps/ingress_controller/ingress_nginx/tasks/main.yml:59 -------------------------------------------------------
etcdctl_etcdutl : Copy etcdctl and etcdutl binary from download dir ---------------------------------------------------------------- 81.16s
/kubespray/roles/etcdctl_etcdutl/tasks/main.yml:30 ----------------------------------------------------------------------------------------
kubernetes/control-plane : Kubeadm | Initialize first master ----------------------------------------------------------------------- 73.56s
/kubespray/roles/kubernetes/control-plane/tasks/kubeadm-setup.yml:192 ---------------------------------------------------------------------
kubernetes/control-plane : Install script to renew K8S control plane certificates -------------------------------------------------- 70.95s
/kubespray/roles/kubernetes/control-plane/tasks/main.yml:100 ------------------------------------------------------------------------------
container-engine/nerdctl : Nerdctl | Install nerdctl configuration ----------------------------------------------------------------- 70.88s
/kubespray/roles/container-engine/nerdctl/tasks/main.yml:29 -------------------------------------------------------------------------------
container-engine/crictl : Install crictl config ------------------------------------------------------------------------------------ 70.86s
/kubespray/roles/container-engine/crictl/tasks/crictl.yml:7 -------------------------------------------------------------------------------
etcdctl_etcdutl : Create etcdctl wrapper script ------------------------------------------------------------------------------------ 70.85s
/kubespray/roles/etcdctl_etcdutl/tasks/main.yml:41 ----------------------------------------------------------------------------------------
kubernetes/node : Write kubelet systemd init file ---------------------------------------------------------------------------------- 70.83s
/kubespray/roles/kubernetes/node/tasks/kubelet.yml:31 -------------------------------------------------------------------------------------
kubernetes/preinstall : NetworkManager | Prevent NetworkManager from managing Calico interfaces (cali*/tunl*/vxlan.calico) --------- 70.82s
/kubespray/roles/kubernetes/preinstall/tasks/0062-networkmanager-unmanaged-devices.yml:8 --------------------------------------------------
kubernetes/preinstall : NetworkManager | Prevent NetworkManager from managing K8S interfaces (kube-ipvs0/nodelocaldns) ------------- 70.82s
/kubespray/roles/kubernetes/preinstall/tasks/0062-networkmanager-unmanaged-devices.yml:21 -------------------------------------------------
kubernetes-apps/cluster_roles : Kubernetes Apps | Add ClusterRoleBinding to admit nodes -------------------------------------------- 70.81s
/kubespray/roles/kubernetes-apps/cluster_roles/tasks/main.yml:14 --------------------------------------------------------------------------
etcd : Configure | Copy etcd.service systemd file ---------------------------------------------------------------------------------- 70.79s
/kubespray/roles/etcd/tasks/configure.yml:48 ----------------------------------------------------------------------------------------------
etcd : Gen_certs | write openssl config -------------------------------------------------------------------------------------------- 70.78s
/kubespray/roles/etcd/tasks/gen_certs_script.yml:20 ---------------------------------------------------------------------------------------

$ ansible-playbook -i inventory/"${INVENTORY_DIR}"/hosts.yaml \
                   --become --become-user=root \
                   --private-key inventory/${INVENTORY_DIR}/.ssh/${SSH_KEY_NAME} \
                   scale.yml --flush-cache -v

$ ansible-playbook -i inventory/"${INVENTORY_DIR}"/hosts.yaml \
                   --become --become-user=root \
                   --private-key inventory/${INVENTORY_DIR}/.ssh/${SSH_KEY_NAME} \
                   --extra-vars "node=nodename,nodename2" \
                   remove-node.yml --flush-cache -v

  • upgrade kubernetes

    # i.e.: v1.18.10 to v1.19.7
    $ ansible-playbook -i inventory/"${INVENTORY_DIR}"/hosts.yaml \
                       -e kube_version=v1.18.10 -e upgrade_cluster_setup=true \
                       cluster.yml --flush-cache -v
    $ ansible-playbook -i inventory/"${INVENTORY_DIR}"/hosts.yaml \
                       -e kube_version=v1.19.7 -e upgrade_cluster_setup=true \
                       cluster.yml --flush-cache -v
  • gracefully upgrade

    $ ansible-playbook -i inventory/"${INVENTORY_DIR}"/hosts.yaml \
                       -e kube_version=v1.19.7 \
                       upgrade-cluster.yml --flush-cache --become
    
    # upgrade one node at a time
    $ ansible-playbook -i inventory/sample/hosts.yaml \
                       -e kube_version=v1.20.7 \
                       -e "serial=1" \                         # upgrade one node at a time
                       upgrade-cluster.yml --flush-cache --become

addons

grafana

$ helm repo add grafana https://grafana.github.io/helm-charts
$ helm repo list
NAME                    URL
kubernetes-dashboard    https://kubernetes.github.io/dashboard/
ingress-nginx           https://kubernetes.github.io/ingress-nginx
grafana                 https://grafana.github.io/helm-charts

$ helm repo update
$ helm search repo grafana/grafana

$ helm install grafana grafana/grafana --namespace monitoring --create-namespace

tips

CRIO

  • roles/kubernetes/preinstall/tasks/0080-system-configurations.yml:

    [!NOTE|label:references:]

    • remove task Set selinux policy in roles/kubernetes/preinstall/tasks/0080-system-configurations.yml

      14   - name: Set selinux policy
      15     ansible.posix.selinux:
      16       policy: targeted
      17       state: "{{ preinstall_selinux_state }}"
      18     when:
      19       - ansible_os_family == "RedHat"
      20       - "'Amazon' not in ansible_distribution"
      21       - slc.stat.exists
      22     tags:
      23       - bootstrap-os
  • /usr/local/bin/crictl cannot be found

    • all/all.yaml

      bin_dir: /usr/bin
    • or mapping crictl

      # in all control nodes
      $ sudo ln -s /usr/bin/crictl /usr/local/bin/crictl
  • roles/kubernetes/node/tasks/facts.yml

    • original:

      17   - name: Gather cgroups facts for crio
      18     when: container_manager == 'crio'
      19     block:
      20     - name: Look up crio cgroup driver
      21       shell: "set -o pipefail && {{ bin_dir }}/{{ crio_status_command }} info | grep 'cgroup driver' | awk -F': ' '{ print $2; }'"
    • modify to:

      17   - name: Gather cgroups facts for crio
      18     when: container_manager == 'crio'
      19     block:
      20     - name: Look up crio cgroup driver
      21       shell: "set -o pipefail && sudo systemctl start crio; /usr/bin/crio status info | grep 'cgroup driver' | awk -F': ' '{ print $2; }'"
  • roles/download/tasks/download_container.yml:

    • delete task Download_container | Download image if required

      57       - name: Download_container | Download image if required
      58         command: "{{ image_pull_command_on_localhost if download_localhost else image_pull_command }} {{ image_reponame }}"
      59         delegate_to: "{{ download_delegate if download_run_once else inventory_hostname }}"
      60         delegate_facts: true
      61         run_once: "{{ download_run_once }}"
      62         register: pull_task_result
      63         until: pull_task_result is succeeded
      64         delay: "{{ retry_stagger | random + 3 }}"
      65         retries: "{{ download_retries }}"
      66         become: "{{ user_can_become_root | default(false) or not download_localhost }}"
      67         environment: "{{ proxy_env if container_manager == 'containerd' else omit }}"
      68         when:
      69           - pull_required or download_run_once
      70           - not image_is_cached

manual remove kubernetes-dashboard

[!NOTE|label:references:]

$ kubectl -n kubernetes-dashboard delete deploy       kubernetes-metrics-scraper
$ kubectl -n kubernetes-dashboard delete deploy       kubernetes-dashboard
$ kubectl -n kubernetes-dashboard delete service      dashboard-metrics-scraper
$ kubectl -n kubernetes-dashboard delete service      kubernetes-dashboard
$ kubectl -n kubernetes-dashboard delete sa           kubernetes-dashboard
$ kubectl -n kubernetes-dashboard delete rolebindings kubernetes-dashboard
$ kubectl -n kubernetes-dashboard delete role         kubernetes-dashboard
$ kubectl -n kubernetes-dashboard delete cm           kubernetes-dashboard-settings
$ kubectl -n kubernetes-dashboard delete secrets      kubernetes-dashboard-key-holder
$ kubectl -n kubernetes-dashboard delete secrets      kubernetes-dashboard-csrf
$ kubectl -n kubernetes-dashboard delete secrets      kubernetes-dashboard-certs

$ kubectl delete ClusterRoleBinding kubernetes-dashboard
$ kubectl delete ClusterRole        kubernetes-dashboard

Last updated