#!/bin/bash set -e set -E # cause 'trap funcname ERR' to be inherited by child commands, see https://stackoverflow.com/questions/35800082/how-to-trap-err-when-using-set-e-in-bash MASTER=1 DIR=. KURL_URL="https://kurl.sh" DIST_URL="https://s3.kurl.sh/dist" FALLBACK_URL="https://kurl-sh.s3.amazonaws.com/dist" INSTALLER_ID="cloning-capability-app" KURL_VERSION="v2025.04.03-0" CRICTL_VERSION=1.20.0 REPLICATED_APP_URL="https://replicated.app" KURL_UTIL_IMAGE="replicated/kurl-util:v2025.04.03-0" KURL_BIN_UTILS_FILE="kurl-bin-utils-v2025.04.03-0.tar.gz" # STEP_VERSIONS array is generated by the server and injected at runtime based on supported k8s versions STEP_VERSIONS=(0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 0.0.0 1.16.4 1.17.13 1.18.20 1.19.16 1.20.15 1.21.14 1.22.17 1.23.17 1.24.17 1.25.14 1.26.15 1.27.16 1.28.15 1.29.15 1.30.11 1.31.7 1.32.3) # ROOK_STEP_VERSIONS array is generated by the server and injected at runtime based on supported rook versions ROOK_STEP_VERSIONS=(1.0.4-14.2.21 0.0.0 0.0.0 0.0.0 1.4.9 1.5.12 1.6.11 1.7.11 1.8.10 1.9.12 1.10.11 1.11.8 1.12.8 1.13.10 1.14.12 1.15.8 1.16.6) # CONTAINERD_STEP_VERSIONS array is generated by the server and injected at runtime based on supported containerd versions CONTAINERD_STEP_VERSIONS=(0.0.0 0.0.0 1.2.13 1.3.9 1.4.13 1.5.11 1.6.33 1.7.26) INSTALLER_YAML="apiVersion: cluster.kurl.sh/v1beta1 kind: Installer metadata: name: cloning-capability-app spec: kubernetes: version: 1.30.11 flannel: version: 0.26.0 openebs: isLocalPVEnabled: true localPVStorageClassName: default version: 3.10.0 kotsadm: applicationSlug: cloning-capability-app disableS3: true version: 1.124.16 s3Override: https://s3.kurl.sh/external/kotsadm-1.124.16.tar.gz ekco: version: 0.28.7 kurl: installerVersion: v2025.04.03-0 hostPreflights: apiVersion: troubleshoot.sh/v1beta2 kind: HostPreflight spec: analyzers: - udpPortStatus: checkName: Flannel UDP port 8472 status collectorName: Flannel UDP port 8472 exclude: '{{kurl .IsUpgrade }}' outcomes: - warn: message: Another process is already listening on port when: address-in-use - fail: message: Unexpected port status when: error - pass: message: Port is open when: connected - warn: message: Unexpected port status collectors: - udpPortStatus: collectorName: Flannel UDP port 8472 exclude: '{{kurl .IsUpgrade }}' port: 8472 licenseURL: https://www.red-gate.com/assets/purchase/assets/subscription-license.txt additionalNoProxyAddresses: [] containerd: version: 1.6.32 " # shellcheck disable=SC2148 # no shebang as this is a composite script function kurl_init_config() { if kubernetes_resource_exists kurl configmap kurl-current-config; then kubectl delete configmap -n kurl kurl-last-config || true kubectl get configmap -n kurl -o json kurl-current-config | sed 's/kurl-current-config/kurl-last-config/g' | kubectl apply -f - kubectl delete configmap -n kurl kurl-current-config || true else kubectl create configmap -n kurl kurl-last-config fi kubectl create configmap -n kurl kurl-current-config kurl_set_current_version } function kurl_set_current_version() { if [ -z "${KURL_VERSION}" ]; then return fi kubectl patch configmaps -n kurl kurl-current-config --type merge -p "{\"data\":{\"kurl-version\":\"${KURL_VERSION}\"}}" } function kurl_install_support_bundle_configmap() { cat </dev/null | head -n 1)" if [ -n "$package_path" ]; then # the package already exists, no need to download it printf "The package %s is already available locally.\n" "$(basename "$package_path")" else # prompt the user to give us the package printf "The package %s %s is not available locally, and is required.\n" "$name" "$version" printf "\nYou can download it with the following command:\n" printf "\n${GREEN} curl -LO %s${NC}\n\n" "$(get_dist_url)/$package_name" addon_fetch_airgap_prompt_for_package "$package_name" fi fi printf "Unpacking %s %s...\n" "$name" "$version" tar xf "$package_path" --no-same-owner # do not source the addon here as the kubernetes "addon" uses this function but is not an addon } # addon_fetch_multiple_airgap checks if the files are already present - if they are, use that # if they are not, prompt the user to provide them as a single package # if the user does not provide the files, bail # exports the package filepath for later cleanup function addon_fetch_multiple_airgap() { local addon_versions=( "$@" ) local missing_addon_versions=() export AIRGAP_MULTI_ADDON_PACKAGE_PATH= for addon_version in "${addon_versions[@]}"; do local name=, version= name=$(echo "$addon_version" | cut -d- -f1) version=$(echo "$addon_version" | cut -d- -f2) local package_name="$name-$version.tar.gz" local package_path= package_path="$(package_filepath "$package_name")" if [ -f "$package_path" ]; then # the package already exists, no need to download it printf "The package %s %s is already available locally.\n" "$name" "$version" printf "Unpacking %s...\n" "$package_name" if ! tar xf "$package_path" --no-same-owner ; then bail "Failed to unpack $package_name" fi else # the package does not exist, add it to the list of missing packages missing_addon_versions+=("$addon_version") fi done if [ "${#missing_addon_versions[@]}" -gt 0 ]; then local package_list= package_list=$(printf ",%s" "${missing_addon_versions[@]}") # join with commas package_list="${package_list:1}" local package_name="$package_list.tar.gz" local package_path= package_path="$(package_filepath "$package_name")" AIRGAP_MULTI_ADDON_PACKAGE_PATH="$package_path" if [ -f "$package_path" ]; then # the package already exists, no need to download it printf "The package %s is already available locally.\n" "$package_name" else local bundle_url="$KURL_URL/bundle" if [ -n "$KURL_VERSION" ]; then bundle_url="$bundle_url/version/$KURL_VERSION" fi bundle_url="$bundle_url/$INSTALLER_ID/packages/$package_name" printf "The following packages are not available locally, and are required:\n" # prompt the user to give us the packages for addon_version in "${missing_addon_versions[@]}"; do printf " %s\n" "$addon_version.tar.gz" done printf "\nYou can download them with the following command:\n" printf "\n${GREEN} curl -LO %s${NC}\n\n" "$bundle_url" addon_fetch_airgap_prompt_for_package "$package_name" fi printf "Unpacking %s...\n" "$package_name" if ! tar xf "$package_path" --no-same-owner ; then bail "Failed to unpack $package_name" fi # do not source the addon here as we are loading multiple addons that may conflict # also the kubernetes "addon" uses this function but is not an addon fi } # addon_fetch_airgap_prompt_for_package prompts the user do download a package function addon_fetch_airgap_prompt_for_package() { local package_name="$1" local package_path= package_path=$(package_filepath "$package_name") if ! prompts_can_prompt; then # we can't ask the user to give us the file because there are no prompts, but we can say where to put it for a future run bail "Please move this file to $KURL_INSTALL_DIRECTORY/$package_path before rerunning the installer." fi printf "Please provide the path to the file on the server.\n" printf "Absolute path to file: " prompt if [ -n "$PROMPT_RESULT" ]; then local loaded_package_path="$PROMPT_RESULT" if [ ! -f "$loaded_package_path" ]; then bail "The file $loaded_package_path does not exist." fi mkdir -p "$(dirname "$package_path")" log "Copying $loaded_package_path to $package_path" cp "$loaded_package_path" "$package_path" else logFail "Package $package_name not provided." logFail "You can provide the path to this file the next time the installer is run," bail "or move it to $KURL_INSTALL_DIRECTORY/$package_path to be detected automatically.\n" fi } function addon_outro() { if [ -n "$PROXY_ADDRESS" ]; then ADDONS_HAVE_HOST_COMPONENTS=1 fi if [ "$ADDONS_HAVE_HOST_COMPONENTS" = "1" ] && kubernetes_has_remotes; then local common_flags common_flags="${common_flags}$(get_docker_registry_ip_flag "${DOCKER_REGISTRY_IP}")" local no_proxy_addresses="" [ -n "$ADDITIONAL_NO_PROXY_ADDRESSES" ] && no_proxy_addresses="$ADDITIONAL_NO_PROXY_ADDRESSES" [ -n "${SERVICE_CIDR}" ] && no_proxy_addresses="${no_proxy_addresses:+$no_proxy_addresses,}${SERVICE_CIDR}" [ -n "${POD_CIDR}" ] && no_proxy_addresses="${no_proxy_addresses:+$no_proxy_addresses,}${POD_CIDR}" [ -n "$no_proxy_addresses" ] && common_flags="${common_flags}$(get_additional_no_proxy_addresses_flag 1 "$no_proxy_addresses")" common_flags="${common_flags}$(get_kurl_install_directory_flag "${KURL_INSTALL_DIRECTORY_FLAG}")" common_flags="${common_flags}$(get_skip_system_package_install_flag)" common_flags="${common_flags}$(get_exclude_builtin_host_preflights_flag)" common_flags="${common_flags}$(get_remotes_flags)" printf "\n${YELLOW}Run this script on all remote nodes to apply changes${NC}\n" if [ "$AIRGAP" = "1" ]; then local command= command=$(printf "cat ./upgrade.sh | sudo bash -s airgap${common_flags}") echo "$command yes" > "$DIR/remotes/allnodes" printf "\n\t${GREEN}%s${NC}\n\n" "$command" else local prefix= prefix="$(build_installer_prefix "${INSTALLER_ID}" "${KURL_VERSION}" "${KURL_URL}" "${PROXY_ADDRESS}" "${PROXY_HTTPS_ADDRESS}")" local command= command=$(printf "${prefix}upgrade.sh | sudo bash -s${common_flags}") echo "$command yes" > "$DIR/remotes/allnodes" printf "\n\t${GREEN}%s${NC}\n\n" "$command" fi if [ "${KURL_IGNORE_REMOTE_UPGRADE_PROMPT}" != "1" ]; then if prompts_can_prompt ; then echo "Press enter to proceed" prompt fi else logWarn "Remote upgrade script prompt explicitly ignored" fi fi while read -r name; do if commandExists ${name}_outro; then ${name}_outro fi done < <(find addons/ -mindepth 1 -maxdepth 1 -type d -printf '%f\n') } function addon_cleanup() { rm -rf "${DIR}/addons" } function addon_has_been_applied() { local name=$1 if [ "$name" = "containerd" ]; then if [ -f $DIR/containerd-last-applied ]; then last_applied=$(cat $DIR/containerd-last-applied) fi else last_applied=$(kubectl get configmap -n kurl kurl-last-config -o jsonpath="{.data.addons-$name}") fi current=$(get_addon_config "$name" | base64 -w 0) if [[ "$current" == "" ]] ; then # current should never be the empty string - it should at least contain the version - so this indicates an error # it would be better to reinstall unnecessarily rather than skip installing, so we report that the addon has not been applied return 1 fi if [[ "$last_applied" == "$current" ]] ; then return 0 fi return 1 } function addon_set_has_been_applied() { local name=$1 current=$(get_addon_config "$name" | base64 -w 0) if [ "$name" = "containerd" ]; then echo "$current" > $DIR/containerd-last-applied else kubectl patch configmaps -n kurl kurl-current-config --type merge -p "{\"data\":{\"addons-$name\":\"$current\"}}" fi } function addon_source() { local name=$1 local version=$2 # shellcheck disable=SC1090 . "$DIR/addons/$name/$version/install.sh" } GREEN='\033[0;32m' BLUE='\033[0;94m' LIGHT_BLUE='\033[0;34m' YELLOW='\033[0;33m' RED='\033[0;31m' NC='\033[0m' # No Color KUBEADM_CONF_DIR=/opt/replicated KUBEADM_CONF_FILE="$KUBEADM_CONF_DIR/kubeadm.conf" function commandExists() { command -v "$@" > /dev/null 2>&1 } function get_dist_url() { local url="$DIST_URL" if [ -n "${KURL_VERSION}" ]; then url="${DIST_URL}/${KURL_VERSION}" fi echo "$url" } # default s3 endpoint does not have AAAA records so IPv6 installs have to choose # an arbitrary regional dualstack endpoint. If S3 transfer acceleration is ever # enabled on the kurl-sh bucket the s3.accelerate.amazonaws.com endpoint can be # used for both IPv4 and IPv6. # this is not required for get_dist_url as *.kurl.sh endpoints have IPv6 addresses. function get_dist_url_fallback() { local url="$FALLBACK_URL" if [ -n "${KURL_VERSION}" ]; then url="${FALLBACK_URL}/${KURL_VERSION}" fi if [ "$IPV6_ONLY" = "1" ]; then echo "$url" | sed 's/s3\.amazonaws\.com/s3.dualstack.us-east-1.amazonaws.com/' else echo "$url" fi } function package_download() { local package="$1" local url_override="$2" if [ -z "$package" ]; then bail "package_download called with no package name" fi if [ -z "$url_override" ] && [ -z "${DIST_URL}" ]; then logWarn "DIST_URL not set, will not download $1" return fi mkdir -p assets touch assets/Manifest local etag= local checksum= etag="$(grep -F "${package}" assets/Manifest | awk 'NR == 1 {print $2}')" checksum="$(grep -F "${package}" assets/Manifest | awk 'NR == 1 {print $3}')" if [ -n "${etag}" ] && ! package_matches_checksum "${package}" "${checksum}" ; then etag= fi local package_url= if [ -z "$url_override" ]; then package_url="$(get_dist_url)/${package}" else package_url="${url_override}" fi local newetag= newetag="$(curl -IfsSL "$package_url" | grep -i 'etag:' | sed -r 's/.*"(.*)".*/\1/')" if [ -n "${etag}" ] && [ "${etag}" = "${newetag}" ]; then echo "Package ${package} already exists, not downloading" return fi local filepath= filepath="$(package_filepath "${package}")" sed -i "/^$(printf '%s' "${package}").*/d" assets/Manifest # remove from manifest rm -f "${filepath}" # remove the file echo "Downloading package ${package}" if [ -z "$url_override" ]; then if [ -z "$FALLBACK_URL" ]; then package_download_url_with_retry "$package_url" "${filepath}" else package_download_url_with_retry "$package_url" "${filepath}" || package_download_url_with_retry "$(get_dist_url_fallback)/${package}" "${filepath}" fi else package_download_url_with_retry "${url_override}" "${filepath}" fi checksum="$(md5sum "${filepath}" | awk '{print $1}')" echo "${package} ${newetag} ${checksum}" >> assets/Manifest } function package_download_url_with_retry() { local url="$1" local filepath="$2" local max_retries="${3:-10}" local errcode= local i=0 while [ $i -ne "$max_retries" ]; do errcode=0 curl -fL -o "${filepath}" "${url}" || errcode="$?" # 18 transfer closed with outstanding read data remaining # 56 recv failure (connection reset by peer) if [ "$errcode" -eq "18" ] || [ "$errcode" -eq "56" ]; then i=$(($i+1)) continue fi return "$errcode" done return "$errcode" } function package_filepath() { local package="$1" echo "assets/${package}" } function package_matches_checksum() { local package="$1" local checksum="$2" local filepath="$(package_filepath "${package}")" if [ -z "${checksum}" ]; then return 1 elif [ ! -f "${filepath}" ] || [ ! -s "${filepath}" ]; then # if not exists or empty return 1 elif ! md5sum "${filepath}" | grep -Fq "${checksum}" ; then echo "Package ${package} checksum does not match" return 1 fi return 0 } function package_cleanup() { if [ -z "${DIST_URL}" ] || [ "${AIRGAP}" = "1" ]; then return fi addon_cleanup rm -rf "${DIR}/packages" } function insertOrReplaceJsonParam() { if ! [ -f "$1" ]; then # If settings file does not exist mkdir -p "$(dirname "$1")" echo "{\"$2\": \"$3\"}" > "$1" else # Settings file exists if grep -q -E "\"$2\" *: *\"[^\"]*\"" "$1"; then # If settings file contains named setting, replace it sed -i -e "s/\"$2\" *: *\"[^\"]*\"/\"$2\": \"$3\"/g" "$1" else # Insert into settings file (with proper commas) if [ $(wc -c <"$1") -ge 5 ]; then # File long enough to actually have an entry, insert "name": "value",\n after first { _commonJsonReplaceTmp="$(awk "NR==1,/^{/{sub(/^{/, \"{\\\"$2\\\": \\\"$3\\\", \")} 1" "$1")" echo "$_commonJsonReplaceTmp" > "$1" else # file not long enough to actually have contents, replace wholesale echo "{\"$2\": \"$3\"}" > "$1" fi fi fi } function semverParse() { major="${1%%.*}" minor="${1#$major.}" minor="${minor%%.*}" patch="${1#$major.$minor.}" patch="${patch%%[-.]*}" } SEMVER_COMPARE_RESULT= function semverCompare() { semverParse "$1" _a_major="${major:-0}" _a_minor="${minor:-0}" _a_patch="${patch:-0}" semverParse "$2" _b_major="${major:-0}" _b_minor="${minor:-0}" _b_patch="${patch:-0}" if [ "$_a_major" -lt "$_b_major" ]; then SEMVER_COMPARE_RESULT=-1 return fi if [ "$_a_major" -gt "$_b_major" ]; then SEMVER_COMPARE_RESULT=1 return fi if [ "$_a_minor" -lt "$_b_minor" ]; then SEMVER_COMPARE_RESULT=-1 return fi if [ "$_a_minor" -gt "$_b_minor" ]; then SEMVER_COMPARE_RESULT=1 return fi if [ "$_a_patch" -lt "$_b_patch" ]; then SEMVER_COMPARE_RESULT=-1 return fi if [ "$_a_patch" -gt "$_b_patch" ]; then SEMVER_COMPARE_RESULT=1 return fi SEMVER_COMPARE_RESULT=0 } function log() { printf "%s\n" "$1" 1>&2 } function logSuccess() { printf "${GREEN}✔ $1${NC}\n" 1>&2 } function logStep() { printf "${BLUE}⚙ $1${NC}\n" 1>&2 } function logSubstep() { printf "\t${LIGHT_BLUE}- $1${NC}\n" 1>&2 } function logFail() { printf "${RED}$1${NC}\n" 1>&2 } function logWarn() { printf "${YELLOW}$1${NC}\n" 1>&2 } function bail() { logFail "$@" exit 1 } function wait_for_nodes() { if ! spinner_until 120 get_nodes_succeeds ; then # this should exit script on non-zero exit code and print error message kubectl get nodes 1>/dev/null fi } function get_nodes_succeeds() { kubectl get nodes >/dev/null 2>&1 } function wait_for_default_namespace() { if ! spinner_until 120 has_default_namespace ; then kubectl get ns bail "No default namespace detected" fi } function has_default_namespace() { kubectl get ns | grep -q '^default' 2>/dev/null } # Label nodes as provisioned by kurl installation # (these labels should have been added by kurl installation. # See kubeadm-init and kubeadm-join yaml files. # This bit will ensure the labels are added for pre-existing cluster # during a kurl upgrade.) function labelNodes() { for NODE in $(kubectl get nodes --no-headers | awk '{print $1}');do kurl_label=$(kubectl describe nodes $NODE | grep "kurl.sh\/cluster=true") || true if [[ -z $kurl_label ]];then kubectl label node --overwrite $NODE kurl.sh/cluster=true; fi done } # warning - this only waits for the pod to be running, not for it to be 1/1 or otherwise accepting connections function spinnerPodRunning() { namespace=$1 podPrefix=$2 local delay=0.75 local spinstr='|/-\' while ! kubectl -n "$namespace" get pods 2>/dev/null | grep "^$podPrefix" | awk '{ print $3}' | grep '^Running$' > /dev/null ; do local temp=${spinstr#?} printf " [%c] " "$spinstr" local spinstr=$temp${spinstr%"$temp"} sleep $delay printf "\b\b\b\b\b\b" done printf " \b\b\b\b" } COMPARE_DOCKER_VERSIONS_RESULT= function compareDockerVersions() { # reset COMPARE_DOCKER_VERSIONS_RESULT= compareDockerVersionsIgnorePatch "$1" "$2" if [ "$COMPARE_DOCKER_VERSIONS_RESULT" -ne "0" ]; then return fi parseDockerVersion "$1" _a_patch="$DOCKER_VERSION_PATCH" parseDockerVersion "$2" _b_patch="$DOCKER_VERSION_PATCH" if [ "$_a_patch" -lt "$_b_patch" ]; then COMPARE_DOCKER_VERSIONS_RESULT=-1 return fi if [ "$_a_patch" -gt "$_b_patch" ]; then COMPARE_DOCKER_VERSIONS_RESULT=1 return fi COMPARE_DOCKER_VERSIONS_RESULT=0 } COMPARE_DOCKER_VERSIONS_RESULT= function compareDockerVersionsIgnorePatch() { # reset COMPARE_DOCKER_VERSIONS_RESULT= parseDockerVersion "$1" _a_major="$DOCKER_VERSION_MAJOR" _a_minor="$DOCKER_VERSION_MINOR" parseDockerVersion "$2" _b_major="$DOCKER_VERSION_MAJOR" _b_minor="$DOCKER_VERSION_MINOR" if [ "$_a_major" -lt "$_b_major" ]; then COMPARE_DOCKER_VERSIONS_RESULT=-1 return fi if [ "$_a_major" -gt "$_b_major" ]; then COMPARE_DOCKER_VERSIONS_RESULT=1 return fi if [ "$_a_minor" -lt "$_b_minor" ]; then COMPARE_DOCKER_VERSIONS_RESULT=-1 return fi if [ "$_a_minor" -gt "$_b_minor" ]; then COMPARE_DOCKER_VERSIONS_RESULT=1 return fi COMPARE_DOCKER_VERSIONS_RESULT=0 } DOCKER_VERSION_MAJOR= DOCKER_VERSION_MINOR= DOCKER_VERSION_PATCH= DOCKER_VERSION_RELEASE= function parseDockerVersion() { # reset DOCKER_VERSION_MAJOR= DOCKER_VERSION_MINOR= DOCKER_VERSION_PATCH= DOCKER_VERSION_RELEASE= if [ -z "$1" ]; then return fi OLD_IFS="$IFS" && IFS=. && set -- $1 && IFS="$OLD_IFS" DOCKER_VERSION_MAJOR=$1 DOCKER_VERSION_MINOR=$2 OLD_IFS="$IFS" && IFS=- && set -- $3 && IFS="$OLD_IFS" DOCKER_VERSION_PATCH=$1 DOCKER_VERSION_RELEASE=$2 } function exportKubeconfig() { local kubeconfig kubeconfig="$(${K8S_DISTRO}_get_kubeconfig)" # To meet KUBERNETES_CIS_COMPLIANCE, the ${kubeconfig} needs to be owned by root:root # and permissions set to 600 so users other than root cannot have access to kubectl if [ "$KUBERNETES_CIS_COMPLIANCE" == "1" ]; then chown root:root ${kubeconfig} chmod 400 ${kubeconfig} else current_user_sudo_group if [ -n "$FOUND_SUDO_GROUP" ]; then chown root:$FOUND_SUDO_GROUP ${kubeconfig} fi chmod 440 ${kubeconfig} fi if ! grep -q "kubectl completion bash" /etc/profile; then if [ "$KUBERNETES_CIS_COMPLIANCE" != "1" ]; then echo "export KUBECONFIG=${kubeconfig}" >> /etc/profile fi echo "if type _init_completion >/dev/null 2>&1; then source <(kubectl completion bash); fi" >> /etc/profile fi } function kubernetes_resource_exists() { local namespace=$1 local kind=$2 local name=$3 kubectl -n "$namespace" get "$kind" "$name" &>/dev/null } function install_cri() { # In the event someone changes the installer spec from docker to containerd, maintain backward capability with old installs if [ -n "$DOCKER_VERSION" ] ; then export REPORTING_CONTEXT_INFO="docker $DOCKER_VERSION" report_install_docker export REPORTING_CONTEXT_INFO="" elif [ -n "$CONTAINERD_VERSION" ]; then export REPORTING_CONTEXT_INFO="containerd $CONTAINERD_VERSION" report_install_containerd export REPORTING_CONTEXT_INFO="" fi } function report_install_docker() { report_addon_start "docker" "$DOCKER_VERSION" install_docker apply_docker_config report_addon_success "docker" "$DOCKER_VERSION" } function report_install_containerd() { # if we haven't installed kubernetes yet we don't need to wory about containerd upgrades. if [ -z "$CURRENT_KUBERNETES_VERSION" ] ; then addon_install "containerd" "$CONTAINERD_VERSION" return 0 fi # if the node we are running this script is leveraging docker we also don't need to worry # about the version of containerd we are installing, it won't be an upgrade anyways. if node_is_using_docker ; then addon_install "containerd" "$CONTAINERD_VERSION" return 0 fi # on amazon 2023 we are using the default containerd version that comes with the OS. if is_amazon_2023 ; then addon_install "containerd" "$CONTAINERD_VERSION" return 0 fi # if we can't find containerd in the local filesystem then we can also install regardless # of version. if [ ! -f "/usr/bin/containerd" ]; then addon_install "containerd" "$CONTAINERD_VERSION" return 0 fi # from now on we are migrating from one containerd version to another, restrictions apply. local current_containerd_version current_containerd_version=$(/usr/bin/containerd --version | cut -d " " -f3 | tr -d 'v') containerd_evaluate_upgrade "$current_containerd_version" "$CONTAINERD_VERSION" for version in "${CONTAINERD_INSTALL_VERSIONS[@]}"; do logStep "Moving containerd to version v$version." if [ "$version" != "$CONTAINERD_VERSION" ] && [ "$AIRGAP" != "1" ] ; then log "Downloading containerd v$version." addon_fetch "containerd" "$version" fi addon_install "containerd" "$version" done } function load_images() { if [ -n "$DOCKER_VERSION" ]; then find "$1" -type f | xargs -I {} bash -c "docker load < {}" else find "$1" -type f | xargs -I {} bash -c "cat {} | gunzip | ctr -a $(${K8S_DISTRO}_get_containerd_sock) -n=k8s.io images import -" fi retag_gcr_images } # try a command every 2 seconds until it succeeds, up to 30 tries max; useful for kubectl commands # where the Kubernetes API could be restarting function try_1m() { local fn="$1" local args=${@:2} n=0 while ! $fn $args 2>/dev/null ; do n="$(( $n + 1 ))" if [ "$n" -ge "30" ]; then # for the final try print the error and let it exit echo "" try_output="$($fn $args 2>&1)" || true echo "$try_output" bail "spent 1m attempting to run \"$fn $args\" without success" fi sleep 2 done } # try a command every 2 seconds until it succeeds, up to 150 tries max; useful for kubectl commands # where the Kubernetes API could be restarting function try_5m() { local fn="$1" local args=${@:2} n=0 while ! $fn $args 2>/dev/null ; do n="$(( $n + 1 ))" if [ "$n" -ge "150" ]; then # for the final try print the error and let it exit echo "" try_output="$($fn $args 2>&1)" || true echo "$try_output" bail "spent 5m attempting to run \"$fn $args\" without success" fi sleep 2 done } # try a command every 2 seconds until it succeeds, up to 30 tries max; useful for kubectl commands # where the Kubernetes API could be restarting # does not redirect stderr to /dev/null function try_1m_stderr() { local fn="$1" local args=${@:2} n=0 while ! $fn $args ; do n="$(( $n + 1 ))" if [ "$n" -ge "30" ]; then # for the final try print the error and let it exit echo "" try_output="$($fn $args 2>&1)" || true echo "$try_output" bail "spent 1m attempting to run \"$fn $args\" without success" fi sleep 2 done } # Run a test every second with a spinner until it succeeds function spinner_until() { local timeoutSeconds="$1" local cmd="$2" local args=${@:3} if [ -z "$timeoutSeconds" ]; then timeoutSeconds=-1 fi local delay=1 local elapsed=0 local spinstr='|/-\' while ! $cmd $args; do elapsed=$((elapsed + delay)) if [ "$timeoutSeconds" -ge 0 ] && [ "$elapsed" -gt "$timeoutSeconds" ]; then return 1 fi local temp=${spinstr#?} printf " [%c] " "$spinstr" local spinstr=$temp${spinstr%"$temp"} sleep $delay printf "\b\b\b\b\b\b" done } function sleep_spinner() { local sleepSeconds="${1:-0}" local delay=1 local elapsed=0 local spinstr='|/-\' while true ; do elapsed=$((elapsed + delay)) if [ "$elapsed" -gt "$sleepSeconds" ]; then return 0 fi local temp=${spinstr#?} printf " [%c] " "$spinstr" local spinstr=$temp${spinstr%"$temp"} sleep $delay printf "\b\b\b\b\b\b" done } function get_common() { if [ "$AIRGAP" != "1" ] && [ -n "$DIST_URL" ]; then if [ -z "$FALLBACK_URL" ]; then curl -sSOL "$(get_dist_url)/common.tar.gz" else curl -sSOL "$(get_dist_url)/common.tar.gz" || curl -sSOL "$(get_dist_url_fallback)/common.tar.gz" fi tar xf common.tar.gz --no-same-owner rm common.tar.gz fi } function get_shared() { if [ -f shared/kurl-util.tar ]; then if [ -n "$DOCKER_VERSION" ]; then docker load < shared/kurl-util.tar else ctr -a "$(${K8S_DISTRO}_get_containerd_sock)" -n=k8s.io images import shared/kurl-util.tar fi fi } function all_sudo_groups() { # examples of lines we're looking for in any sudo config files to find group with root privileges # %wheel ALL = (ALL) ALL # %google-sudoers ALL=(ALL:ALL) NOPASSWD:ALL # %admin ALL=(ALL) ALL cat /etc/sudoers | grep -Eo '^%\S+\s+ALL\s?=.*ALL\b' | awk '{print $1 }' | sed 's/%//' find /etc/sudoers.d/ -type f | xargs cat | grep -Eo '^%\S+\s+ALL\s?=.*ALL\b' | awk '{print $1 }' | sed 's/%//' } # if the sudo group cannot be detected default to root FOUND_SUDO_GROUP= function current_user_sudo_group() { if [ -z "$SUDO_UID" ]; then return 0 fi # return the first sudo group the current user belongs to while read -r groupName; do if id "$SUDO_UID" -Gn | grep -q "\b${groupName}\b"; then FOUND_SUDO_GROUP="$groupName" return 0 fi done < <(all_sudo_groups) } function kubeconfig_setup_outro() { current_user_sudo_group local owner="$SUDO_UID" if [ -z "$owner" ]; then # not currently running via sudo owner="$USER" else # running via sudo - automatically create ~/.kube/config if it does not exist ownerdir=$(eval echo "~$(id -un "$owner")") if [ ! -f "$ownerdir/.kube/config" ]; then mkdir -p $ownerdir/.kube cp "$(${K8S_DISTRO}_get_kubeconfig)" $ownerdir/.kube/config chown -R $owner $ownerdir/.kube printf "To access the cluster with kubectl:\n" printf "\n" printf "${GREEN} bash -l${NC}\n" printf "Kurl uses "$(${K8S_DISTRO}_get_kubeconfig)", you might want to unset KUBECONFIG to use .kube/config:\n" printf "\n" printf "${GREEN} echo unset KUBECONFIG >> ~/.bash_profile${NC}\n" return fi fi printf "To access the cluster with kubectl:\n" printf "\n" printf "${GREEN} bash -l${NC}\n" printf "\n" printf "Kurl uses "$(${K8S_DISTRO}_get_kubeconfig)", you might want to copy kubeconfig to your home directory:\n" printf "\n" printf "${GREEN} cp "$(${K8S_DISTRO}_get_kubeconfig)" ~/.kube/config${NC}\n" printf "${GREEN} chown -R ${owner} ~/.kube${NC}\n" printf "${GREEN} echo unset KUBECONFIG >> ~/.bash_profile${NC}\n" printf "${GREEN} bash -l${NC}\n" printf "\n" printf "You will likely need to use sudo to copy and chown "$(${K8S_DISTRO}_get_kubeconfig)".\n" } function splitHostPort() { oIFS="$IFS"; IFS=":" read -r HOST PORT <<< "$1"; IFS="$oIFS" } function isValidIpv4() { if echo "$1" | grep -qs '^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$'; then return 0 else return 1 fi } function isValidIpv6() { if echo "$1" | grep -qs "^\([0-9a-fA-F]\{0,4\}:\)\{1,7\}[0-9a-fA-F]\{0,4\}$"; then return 0 else return 1 fi } function cert_has_san() { local address=$1 local san=$2 echo "Q" | openssl s_client -connect "$address" 2>/dev/null | openssl x509 -noout -text 2>/dev/null | grep --after-context=1 'X509v3 Subject Alternative Name' | grep -q "$2" } # By default journald persists logs if the directory /var/log/journal exists so create it if it's # not found. Sysadmins may still disable persistent logging with /etc/systemd/journald.conf. function journald_persistent() { if [ -d /var/log/journal ]; then return 0 fi mkdir -p /var/log/journal systemd-tmpfiles --create --prefix /var/log/journal systemctl restart systemd-journald journalctl --flush } function rm_file() { if [ -f "$1" ]; then rm $1 fi } # Checks if the provided param is in the current path, and if it is not adds it # this is useful for systems where /usr/local/bin is not in the path for root function path_add() { if [ -d "$1" ] && [[ ":$PATH:" != *":$1:"* ]]; then PATH="${PATH:+"$PATH:"}$1" fi } function install_host_dependencies() { install_host_dependencies_openssl install_host_dependencies_fio || true # fio is not a hard requirement, just a nice-to-have } function install_host_dependencies_openssl() { if commandExists "openssl"; then return fi if ! host_packages_shipped ; then ensure_host_package openssl openssl return fi if [ "$AIRGAP" != "1" ] && [ -n "$DIST_URL" ]; then local package="host-openssl.tar.gz" package_download "${package}" tar xf "$(package_filepath "${package}")" --no-same-owner fi install_host_archives "${DIR}/packages/host/openssl" openssl } function install_host_dependencies_fio() { if commandExists "fio"; then return fi if ! host_packages_shipped ; then if ! ensure_host_package fio fio; then logWarn "Failed to install fio, continuing anyways" fi return fi # if this is Ubuntu 18.04, do not install fio - there are python issues if [ "$LSB_DIST$DIST_VERSION" = "ubuntu18.04" ]; then logWarn "Skipping fio install on Ubuntu 18.04" return fi # we can't install fio on amazon linux because it brings, as a dependency, # a package called `centos-release` that ends up overwriting the amazon # linux release file, making it look like a centos machine. if [ "$LSB_DIST" = "amzn" ]; then logWarn "Skipping fio installation on Amazon Linux. If you prefer, you can cancel this" logWarn "installation now and manually install fio using 'yum install fio'. Ignoring this" logWarn "message may cause some of the preflight checks to generate warnings later on." logWarn "Do you want to continue with the installation without fio?" if ! confirmY ; then bail "Installation cancelled by user" fi return fi if [ "$AIRGAP" != "1" ] && [ -n "$DIST_URL" ]; then local package="host-fio.tar.gz" package_download "${package}" tar xf "$(package_filepath "${package}")" --no-same-owner fi if ! install_host_archives "${DIR}/packages/host/fio" fio; then logWarn "Failed to install fio, continuing anyways" fi } function maybe_read_kurl_config_from_cluster() { # if KURL_INSTALL_DIRECTORY_FLAG is set, use the value from the flag if [ -z "$KURL_INSTALL_DIRECTORY_FLAG" ]; then local kurl_install_directory_flag # we don't yet have KUBECONFIG when this is called from the top of install.sh kurl_install_directory_flag="$(KUBECONFIG="$(kubeadm_get_kubeconfig)" kubectl -n kube-system get cm kurl-config -ojsonpath='{ .data.kurl_install_directory }' 2>/dev/null || echo "")" if [ -n "$kurl_install_directory_flag" ]; then KURL_INSTALL_DIRECTORY_FLAG="$kurl_install_directory_flag" KURL_INSTALL_DIRECTORY="$(realpath "$kurl_install_directory_flag")/kurl" fi fi # this function currently only sets KURL_INSTALL_DIRECTORY # there are many other settings in kurl-config } KURL_INSTALL_DIRECTORY=/var/lib/kurl function pushd_install_directory() { local dir= dir="$(dirname "$KURL_INSTALL_DIRECTORY")" if [ ! -e "$dir" ] ; then bail "kURL installation directory $dir does not exist." fi if [ ! -d "$dir" ] ; then bail "kURL installation directory $dir is not a directory." fi KURL_INSTALL_DIRECTORY="$(realpath "$KURL_INSTALL_DIRECTORY")" local tmpfile= tmpfile="$KURL_INSTALL_DIRECTORY/tmpfile" if ! mkdir -p "$KURL_INSTALL_DIRECTORY" || ! touch "$tmpfile" ; then bail "Directory $KURL_INSTALL_DIRECTORY is not writeable by this script. Please either change the directory permissions or override the installation directory with the flag \"kurl-install-directory\"." fi rm "$tmpfile" pushd "$KURL_INSTALL_DIRECTORY" 1>/dev/null } function popd_install_directory() { popd 1>/dev/null } function move_airgap_assets() { local cwd cwd="$(pwd)" if [ "$(readlink -f $KURL_INSTALL_DIRECTORY)" = "${cwd}/kurl" ]; then return fi pushd_install_directory # make sure we have access popd_install_directory # The airgap bundle will extract everything into ./kurl directory. # Move all assets except the scripts into the $KURL_INSTALL_DIRECTORY to emulate the online install experience. if [ "$(ls -A "${cwd}"/kurl)" ]; then for file in "${cwd}"/kurl/*; do rm -rf "${KURL_INSTALL_DIRECTORY}/$(basename ${file})" mv "${file}" "${KURL_INSTALL_DIRECTORY}/" done fi } function get_docker_registry_ip_flag() { local docker_registry_ip="$1" if [ -z "${docker_registry_ip}" ]; then return fi echo " docker-registry-ip=${docker_registry_ip}" } function get_skip_system_package_install_flag() { if [ "${SKIP_SYSTEM_PACKAGE_INSTALL}" != "1" ]; then return fi echo " skip-system-package-install" } function get_exclude_builtin_host_preflights_flag() { if [ "${EXCLUDE_BUILTIN_HOST_PREFLIGHTS}" != "1" ]; then return fi echo " exclude-builtin-host-preflights" } function get_additional_no_proxy_addresses_flag() { local has_proxy="$1" local no_proxy_addresses="$2" if [ -z "${has_proxy}" ]; then return fi echo " additional-no-proxy-addresses=${no_proxy_addresses}" } function get_kurl_install_directory_flag() { local kurl_install_directory="$1" if [ -z "${kurl_install_directory}" ] || [ "${kurl_install_directory}" = "/var/lib/kurl" ]; then return fi echo " kurl-install-directory=$(echo "${kurl_install_directory}")" } function get_remotes_flags() { local control_plane_label= control_plane_label="$(kubernetes_get_control_plane_label)" while read -r primary; do printf " primary-host=$primary" done < <(kubectl get nodes --no-headers --selector="$control_plane_label" -owide | awk '{ print $6 }') while read -r secondary; do printf " secondary-host=$secondary" done < <(kubectl get node --no-headers --selector='!'"$control_plane_label" -owide | awk '{ print $6 }') } function get_ipv6_flag() { if [ "$IPV6_ONLY" = "1" ]; then echo " ipv6" fi } function systemd_restart_succeeded() { local oldPid=$1 local serviceName=$2 if ! systemctl is-active --quiet $serviceName; then return 1 fi local newPid="$(systemctl show --property MainPID $serviceName | cut -d = -f2)" if [ "$newPid" = "$oldPid" ]; then return 1 fi if [ "$newPid" = "0" ]; then return 1 fi if ps -p $oldPid >/dev/null 2>&1; then return 1 fi return 0 } function restart_systemd_and_wait() { local serviceName=$1 local pid="$(systemctl show --property MainPID $serviceName | cut -d = -f2)" logSubstep "Restarting $serviceName..." systemctl restart $serviceName log "Checking if $serviceName was restarted successfully" if ! spinner_until 120 systemd_restart_succeeded $pid $serviceName; then journalctl -xe bail "Could not successfully restart systemd service $serviceName" fi logSuccess "Service $serviceName restarted." } # returns true when a job has completed function job_is_completed() { local namespace="$1" local jobName="$2" kubectl get jobs -n "$namespace" "$jobName" | grep -q '1/1' } function maybe() { local cmd="$1" local args=( "${@:2}" ) $cmd "${args[@]}" 2>/dev/null || true } MACHINE_ID= KURL_INSTANCE_UUID= function get_machine_id() { MACHINE_ID="$(${DIR}/bin/kurl host protectedid || true)" if [ -f /etc/kurl/uuid ]; then KURL_INSTANCE_UUID="$(cat /etc/kurl/uuid)" else if [ -f "${KURL_INSTALL_DIRECTORY}/uuid" ]; then KURL_INSTANCE_UUID="$(cat ${KURL_INSTALL_DIRECTORY}/uuid)" rm -f "${KURL_INSTALL_DIRECTORY}/uuid" else KURL_INSTANCE_UUID=$(< /dev/urandom tr -dc a-z0-9 | head -c32) fi # use /etc/kurl to persist the instance id "machine id" across cluster reset mkdir -p /etc/kurl echo "$KURL_INSTANCE_UUID" > /etc/kurl/uuid fi } function kebab_to_camel() { echo "$1" | sed -E 's/-(.)/\U\1/g' } function build_installer_prefix() { local installer_id="$1" local kurl_version="$2" local kurl_url="$3" local proxy_address="$4" local proxy_https_address="$5" if [ -z "${kurl_url}" ]; then echo "cat " return fi local is_https= local curl_flags= if [ -n "${proxy_address}" ] || [ -n "${proxy_https_address}" ]; then curl_flags=" -x ${proxy_address}" is_https=$(echo "${kurl_url}" | grep -q "^https" && echo "true" || echo "false") if [ -n "${proxy_https_address}" ] && [ "$is_https" = "true" ]; then curl_flags=" -x ${proxy_https_address}" fi fi if [ -n "${kurl_version}" ]; then echo "curl -fsSL${curl_flags} ${kurl_url}/version/${kurl_version}/${installer_id}/" else echo "curl -fsSL${curl_flags} ${kurl_url}/${installer_id}/" fi } # get_local_node_name returns the name of the current node. function get_local_node_name() { echo "$HOSTNAME" } # this waits for a deployment to have all replicas up-to-date and available function deployment_fully_updated() { x_fully_updated "$1" deployment "$2" } # this waits for a statefulset to have all replicas up-to-date and available function statefulset_fully_updated() { x_fully_updated "$1" statefulset "$2" } # this waits for a resource type (deployment or statefulset) to have all replicas up-to-date and available function x_fully_updated() { local namespace=$1 local resourcetype=$2 local name=$3 local desiredReplicas desiredReplicas=$(kubectl get $resourcetype -n "$namespace" "$name" -o jsonpath='{.status.replicas}') local availableReplicas availableReplicas=$(kubectl get $resourcetype -n "$namespace" "$name" -o jsonpath='{.status.availableReplicas}') local readyReplicas readyReplicas=$(kubectl get $resourcetype -n "$namespace" "$name" -o jsonpath='{.status.readyReplicas}') local updatedReplicas updatedReplicas=$(kubectl get $resourcetype -n "$namespace" "$name" -o jsonpath='{.status.updatedReplicas}') if [ "$desiredReplicas" != "$availableReplicas" ] ; then return 1 fi if [ "$desiredReplicas" != "$readyReplicas" ] ; then return 1 fi if [ "$desiredReplicas" != "$updatedReplicas" ] ; then return 1 fi return 0 } # this waits for a daemonset to have all replicas up-to-date and available function daemonset_fully_updated() { local namespace=$1 local daemonset=$2 local desiredNumberScheduled desiredNumberScheduled=$(kubectl get daemonset -n "$namespace" "$daemonset" -o jsonpath='{.status.desiredNumberScheduled}') local currentNumberScheduled currentNumberScheduled=$(kubectl get daemonset -n "$namespace" "$daemonset" -o jsonpath='{.status.currentNumberScheduled}') local numberAvailable numberAvailable=$(kubectl get daemonset -n "$namespace" "$daemonset" -o jsonpath='{.status.numberAvailable}') local numberReady numberReady=$(kubectl get daemonset -n "$namespace" "$daemonset" -o jsonpath='{.status.numberReady}') local updatedNumberScheduled updatedNumberScheduled=$(kubectl get daemonset -n "$namespace" "$daemonset" -o jsonpath='{.status.updatedNumberScheduled}') if [ "$desiredNumberScheduled" != "$numberAvailable" ] ; then return 1 fi if [ "$desiredNumberScheduled" != "$currentNumberScheduled" ] ; then return 1 fi if [ "$desiredNumberScheduled" != "$numberAvailable" ] ; then return 1 fi if [ "$desiredNumberScheduled" != "$numberReady" ] ; then return 1 fi if [ "$desiredNumberScheduled" != "$updatedNumberScheduled" ] ; then return 1 fi return 0 } # pods_gone_by_selector returns true if there are no pods matching the given selector function pods_gone_by_selector() { local namespace=$1 local selector=$2 [ "$(pod_count_by_selector "$namespace" "$selector")" = "0" ] } # pod_count_by_selector returns the number of pods matching the given selector or -1 if the command fails function pod_count_by_selector() { local namespace=$1 local selector=$2 local pods= if ! pods="$(kubectl -n "$namespace" get pods --no-headers -l "$selector" 2>/dev/null)" ; then echo -1 fi echo -n "$pods" | wc -l } # retag_gcr_images takes every k8s.gcr.io image and adds a registry.k8s.io alias if it does not already exist # and vice versa function retag_gcr_images() { local images= local image= local new_image= if [ -n "$DOCKER_VERSION" ]; then images=$(docker images --format '{{.Repository}}:{{.Tag}}' | { grep -F k8s.gcr.io || true; }) for image in $images ; do new_image="${image//k8s.gcr.io/registry.k8s.io}" docker tag "$image" "$new_image" 2>/dev/null || true # if the image name matches `coredns`, extract the tag and also retag `$image` to registry.k8s.io/coredns:$tag # this handles issues where kubernetes expects coredns to be at registry.k8s.io/coredns:1.6.2 but it is at registry.k8s.io/coredns/coredns:v1.6.2 if [[ "$image" =~ "coredns" ]]; then tag=$(echo "$image" | awk -F':' '{print $2}') docker tag "$image" "registry.k8s.io/coredns:$tag" 2>/dev/null || true fi done images=$(docker images --format '{{.Repository}}:{{.Tag}}' | { grep -F registry.k8s.io || true; }) for image in $images ; do new_image="${image//registry.k8s.io/k8s.gcr.io}" docker tag "$image" "$new_image" 2>/dev/null || true done else images=$(ctr -n=k8s.io images list --quiet | { grep -F k8s.gcr.io || true; }) for image in $images ; do new_image="${image//k8s.gcr.io/registry.k8s.io}" ctr -n k8s.io images tag "$image" "$new_image" 2>/dev/null || true # if the image name matches `coredns`, extract the tag and also retag `$image` to registry.k8s.io/coredns:$tag # this handles issues where kubernetes expects coredns to be at registry.k8s.io/coredns:1.6.2 but it is at registry.k8s.io/coredns/coredns:v1.6.2 if [[ "$image" =~ "coredns" ]]; then tag=$(echo "$image" | awk -F':' '{print $2}') ctr -n k8s.io images tag "$image" "registry.k8s.io/coredns:$tag" 2>/dev/null || true fi done images=$(ctr -n=k8s.io images list --quiet | { grep -F registry.k8s.io || true; }) for image in $images ; do new_image="${image//registry.k8s.io/k8s.gcr.io}" ctr -n k8s.io images tag "$image" "$new_image" 2>/dev/null || true done fi } function canonical_image_name() { local image="$1" if echo "$image" | grep -vq '/' ; then image="library/$image" fi if echo "$image" | awk -F'/' '{print $1}' | grep -vq '\.' ; then image="docker.io/$image" fi if echo "$image" | grep -vq ':' ; then image="$image:latest" fi echo "$image" } # check_for_running_pods scans for pod(s) in a namespace and checks whether their status is running/completed # note: Evicted pods are exempt from this check # exports a variable UNHEALTHY_PODS containing the names of pods that are not running/completed UNHEALTHY_PODS= function check_for_running_pods() { local namespace=$1 local is_job_controller=0 local ns_pods= local status= local containers= local unhealthy_podnames= ns_pods=$(kubectl get pods -n "$namespace" -o jsonpath='{.items[*].metadata.name}') if [ -z "$ns_pods" ]; then return 0 fi for pod in $ns_pods; do status=$(kubectl get pod "$pod" -n "$namespace" -o jsonpath='{.status.phase}') # ignore pods that have been Evicted if [ "$status" == "Failed" ] && [[ $(kubectl get pod "$pod" -n "$namespace" -o jsonpath='{.status.reason}') == "Evicted" ]]; then continue fi if [ "$status" == "Succeeded" ]; then continue fi if [ "$status" != "Running" ]; then unhealthy_podnames="$unhealthy_podnames $pod" continue fi containers=$(kubectl get pod "$pod" -n "$namespace" -o jsonpath="{.spec.containers[*].name}") for container in $containers; do container_status=$(kubectl get pod "$pod" -n "$namespace" -o jsonpath="{.status.containerStatuses[?(@.name==\"$container\")].ready}") if [ "$container_status" != "true" ]; then unhealthy_podnames="$unhealthy_podnames $pod" continue fi done done # if there are unhealthy pods, return 1 if [ -n "$unhealthy_podnames" ]; then export UNHEALTHY_PODS="$unhealthy_podnames" return 1 fi return 0 } # retry a command if it fails up to $1 number of times # Usage: cmd_retry 3 curl --globoff --noproxy "*" --fail --silent --insecure https://10.128.0.25:6443/healthz function cmd_retry() { local retries=$1 shift local count=0 until "$@"; do exit=$? wait=$((2 ** $count)) count=$(($count + 1)) if [ $count -lt $retries ]; then echo "Retry $count/$retries exited $exit, retrying in $wait seconds..." sleep $wait else echo "Retry $count/$retries exited $exit, no more retries left." return $exit fi done return 0 } # common_upgrade_step_versions returns a list of upgrade steps that need to be performed, based on # the supplied space-delimited set of step versions, for use by other functions. # e.g. "1.5.12\n1.6.11\n1.7.11" function common_upgrade_step_versions() { local step_versions= read -ra step_versions <<< "$1" local from_version=$2 local desired_version=$3 local to_version= to_version=$(common_upgrade_version_to_major_minor "$desired_version") # check that major versions are the same local first_major= first_major=$(common_upgrade_major_minor_to_major "$from_version") local last_major= last_major=$(common_upgrade_major_minor_to_major "$to_version") if [ "$first_major" != "$last_major" ]; then bail "Upgrade accross major version from $from_version to $to_version is not supported." fi local first_minor= local last_minor= first_minor=$(common_upgrade_major_minor_to_minor "$from_version") first_minor=$((first_minor + 1)) # exclusive of from_version last_minor=$(common_upgrade_major_minor_to_minor "$to_version") if [ "${#step_versions[@]}" -le "$last_minor" ]; then bail "Upgrade from $from_version to $to_version is not supported." fi # if there are no steps to perform, return if [ "$first_minor" -gt "$last_minor" ]; then return fi if [ "$desired_version" != "$to_version" ]; then last_minor=$((last_minor - 1)) # last version is the desired version fi local step= for (( step=first_minor ; step<=last_minor ; step++ )); do echo "${step_versions[$step]}" done if [ "$desired_version" != "$to_version" ]; then echo "$desired_version" fi } # common_upgrade_compare_versions prints 0 if the versions are equal, 1 if the first is greater, # and -1 if the second is greater. function common_upgrade_compare_versions() { local a="$1" local b="$2" local a_major= local b_major= a_major=$(common_upgrade_major_minor_to_major "$a") b_major=$(common_upgrade_major_minor_to_major "$b") if [ "$a_major" -lt "$b_major" ]; then echo "-1" return elif [ "$a_major" -gt "$b_major" ]; then echo "1" return fi local a_minor= local b_minor= a_minor=$(common_upgrade_major_minor_to_minor "$a") b_minor=$(common_upgrade_major_minor_to_minor "$b") if [ "$a_minor" -lt "$b_minor" ]; then echo "-1" return elif [ "$a_minor" -gt "$b_minor" ]; then echo "1" return fi echo "0" } # common_upgrade_is_version_included returns 0 if the version is included in the range. function common_upgrade_is_version_included() { local from_version="$1" local to_version="$2" local current_version="$3" # if current_version is greater than from_version and current_version is less than or equal to to_version [ "$(common_upgrade_compare_versions "$current_version" "$from_version")" = "1" ] && \ [ "$(common_upgrade_compare_versions "$current_version" "$to_version")" != "1" ] } # common_upgrade_max_version will return the greater of the two versions. function common_upgrade_max_version() { local a="$1" local b="$2" if [ "$(common_upgrade_compare_versions "$a" "$b")" = "1" ]; then echo "$a" else echo "$b" fi } # common_upgrade_print_list_of_minor_upgrades prints message of minor versions that will be # upgraded. e.g. "1.0.x to 1.1, 1.1 to 1.2, 1.2 to 1.3, and 1.3 to 1.4" function common_upgrade_print_list_of_minor_upgrades() { local from_version="$1" local to_version="$2" printf "This involves upgrading from " local first_minor= local last_minor= first_minor=$(common_upgrade_major_minor_to_minor "$from_version") last_minor=$(common_upgrade_major_minor_to_minor "$to_version") local minor= for (( minor=first_minor ; minor "$DIR/remotes/$node" done printf "\n\t%b%s %b\n\n" \ "$GREEN" "$command" "$NC" printf "Are you ready to continue? " confirmY } # node_is_using_docker returns 0 if the current node is using docker as the container runtime. function node_is_using_docker() { local node node="$(get_local_node_name)" kubectl get node "$node" -ojsonpath='{.metadata.annotations.kubeadm\.alpha\.kubernetes\.io/cri-socket}' | grep -q "dockershim.sock" } # get_ekco_addr prints the service address (including port) for reaching the EKCO service to stdout function get_ekco_addr() { if [ -n "$EKCO_ADDRESS" ]; then echo "$EKCO_ADDRESS" return fi local ekco_addr= local ekco_port= local ekco_service_ip= ekco_service_ip=$(kubectl get svc ekc-operator -n kurl -o jsonpath='{.spec.clusterIP}') ekco_port=$(kubectl get svc ekc-operator -n kurl -o jsonpath='{.spec.ports[?(@.nodePort)].port}') ekco_addr="${ekco_service_ip}:${ekco_port}" echo "$ekco_addr" } # get_ekco_storage_migration_auth_token prints the ekco storage migration authentication token to stdout function get_ekco_storage_migration_auth_token() { if [ -n "$EKCO_AUTH_TOKEN" ]; then echo "$EKCO_AUTH_TOKEN" return fi local auth_token= auth_token=$(kubectl get cm -n kurl ekco-config -ojsonpath='{.data.config\.yaml}' | grep "storage_migration_auth_token:" | awk '{print $2}') echo "$auth_token" } # determine storage migration ready timeout function storage_migration_ready_timeout() { if [ -z "$STORAGE_MIGRATION_READY_TIMEOUT" ]; then STORAGE_MIGRATION_READY_TIMEOUT="10m0s" fi echo "$STORAGE_MIGRATION_READY_TIMEOUT" } # return the version of kubernetes that is currently installed on the server function kubectl_server_version() { local kubectl_server_version= if kubectl version --short > /dev/null 2>&1 ; then kubectl_server_version="$(kubectl version --short | grep -i server | awk '{ print $3 }')" else # kubectl version --short is not supported in kubectl > 1.27, but is now the default behavior kubectl_server_version="$(kubectl version | grep -i server | awk '{ print $3 }')" fi echo "$kubectl_server_version" } # return the version of kubernetes that is currently installed on the client function kubectl_client_version() { local kubectl_client_version= if kubectl version --short > /dev/null 2>&1 ; then kubectl_client_version="$(kubectl version --short | grep -i client | awk '{ print $3 }')" else # kubectl version --short is not supported in kubectl > 1.27, but is now the default behavior kubectl_client_version="$(kubectl version | grep -i client | awk '{ print $3 }')" fi echo "$kubectl_client_version" } # create directories for remote commands to be placed within, and ensure they are empty function setup_remote_commands_dirs() { mkdir -p "$DIR/remotes" rm -f "$DIR/remotes/*" } #!/bin/bash function discover() { local fullCluster="$1" detectLsbDist discoverCurrentKubernetesVersion "$fullCluster" # never upgrade docker underneath kubernetes if docker version >/dev/null 2>&1 ; then SKIP_DOCKER_INSTALL=1 if [ -n "$DOCKER_VERSION" ]; then echo "Docker already exists on this machine so no docker install will be performed" fi fi discover_public_ip discover_private_ip KERNEL_MAJOR=$(uname -r | cut -d'.' -f1) KERNEL_MINOR=$(uname -r | cut -d'.' -f2) } LSB_DIST= DIST_VERSION= DIST_VERSION_MAJOR= DIST_VERSION_MINOR= detectLsbDist() { _dist= _error_msg="We have checked /etc/os-release and /etc/centos-release files." if [ -f /etc/centos-release ] && [ -r /etc/centos-release ]; then # CentOS 6 example: CentOS release 6.9 (Final) # CentOS 7 example: CentOS Linux release 7.5.1804 (Core) _dist="$(cat /etc/centos-release | cut -d" " -f1)" _version="$(cat /etc/centos-release | sed 's/Linux //' | sed 's/Stream //' | cut -d" " -f3 | cut -d "." -f1-2)" elif [ -f /etc/os-release ] && [ -r /etc/os-release ]; then _dist="$(. /etc/os-release && echo "$ID")" _version="$(. /etc/os-release && echo "$VERSION_ID")" elif [ -f /etc/redhat-release ] && [ -r /etc/redhat-release ]; then # this is for RHEL6 _dist="rhel" _major_version=$(cat /etc/redhat-release | cut -d" " -f7 | cut -d "." -f1) _minor_version=$(cat /etc/redhat-release | cut -d" " -f7 | cut -d "." -f2) _version=$_major_version elif [ -f /etc/system-release ] && [ -r /etc/system-release ]; then if grep --quiet "Amazon Linux" /etc/system-release; then # Special case for Amazon 2014.03 _dist="amzn" _version=$(awk '/Amazon Linux/{print $NF}' /etc/system-release) fi else _error_msg="$_error_msg\nDistribution cannot be determined because neither of these files exist." fi if [ -n "$_dist" ]; then _error_msg="$_error_msg\nDetected distribution is ${_dist}." _dist="$(echo "$_dist" | tr '[:upper:]' '[:lower:]')" case "$_dist" in ubuntu) _error_msg="$_error_msg\nHowever detected version $_version is less than 12." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 12 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 ;; debian) _error_msg="$_error_msg\nHowever detected version $_version is less than 7." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 7 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 ;; fedora) _error_msg="$_error_msg\nHowever detected version $_version is less than 21." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 21 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 ;; rhel) _error_msg="$_error_msg\nHowever detected version $_version is less than 7." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 6 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 && DIST_VERSION_MINOR="${DIST_VERSION#$DIST_VERSION_MAJOR.}" && DIST_VERSION_MINOR="${DIST_VERSION_MINOR%%.*}" ;; rocky) _error_msg="$_error_msg\nHowever detected version $_version is less than 7." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 6 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 && DIST_VERSION_MINOR="${DIST_VERSION#$DIST_VERSION_MAJOR.}" && DIST_VERSION_MINOR="${DIST_VERSION_MINOR%%.*}" ;; centos) _error_msg="$_error_msg\nHowever detected version $_version is less than 6." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 6 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 && DIST_VERSION_MINOR="${DIST_VERSION#$DIST_VERSION_MAJOR.}" && DIST_VERSION_MINOR="${DIST_VERSION_MINOR%%.*}" ;; amzn) _error_msg="$_error_msg\nHowever detected version $_version is not one of\n 2023, 2, 2.0, 2018.03, 2017.09, 2017.03, 2016.09, 2016.03, 2015.09, 2015.03, 2014.09, 2014.03." [ "$_version" = "2023" ] || \ [ "$_version" = "2" ] || [ "$_version" = "2.0" ] || \ [ "$_version" = "2018.03" ] || \ [ "$_version" = "2017.03" ] || [ "$_version" = "2017.09" ] || \ [ "$_version" = "2016.03" ] || [ "$_version" = "2016.09" ] || \ [ "$_version" = "2015.03" ] || [ "$_version" = "2015.09" ] || \ [ "$_version" = "2014.03" ] || [ "$_version" = "2014.09" ] && \ LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$_version ;; sles) _error_msg="$_error_msg\nHowever detected version $_version is less than 12." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 12 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 ;; ol) _error_msg="$_error_msg\nHowever detected version $_version is less than 6." oIFS="$IFS"; IFS=.; set -- $_version; IFS="$oIFS"; [ $1 -ge 6 ] && LSB_DIST=$_dist && DIST_VERSION=$_version && DIST_VERSION_MAJOR=$1 && DIST_VERSION_MINOR="${DIST_VERSION#$DIST_VERSION_MAJOR.}" && DIST_VERSION_MINOR="${DIST_VERSION_MINOR%%.*}" ;; *) _error_msg="$_error_msg\nThat is an unsupported distribution." ;; esac fi if [ -z "$LSB_DIST" ]; then echo >&2 "$(echo | sed "i$_error_msg")" echo >&2 "" echo >&2 "Please visit the following URL for more detailed installation instructions:" echo >&2 "" echo >&2 " https://kurl.sh/docs/install-with-kurl/system-requirements" exit 1 fi } export CURRENT_KUBERNETES_VERSION= export KUBERNETES_UPGRADE=0 function discoverCurrentKubernetesVersion() { local fullCluster="$1" CURRENT_KUBERNETES_VERSION=$(maybe discover_local_kubernetes_version) if [ -z "$CURRENT_KUBERNETES_VERSION" ]; then # This is a new install and no upgrades are required return 0 fi if [ -z "$fullCluster" ]; then return 0 fi # Populate arrays with versions of remote nodes kubernetes_get_remote_primaries kubernetes_get_secondaries semverCompare "$CURRENT_KUBERNETES_VERSION" "$KUBERNETES_VERSION" if [ "$SEMVER_COMPARE_RESULT" = "-1" ]; then KUBERNETES_UPGRADE=1 elif [ "$SEMVER_COMPARE_RESULT" = "1" ]; then bail "The current Kubernetes version $CURRENT_KUBERNETES_VERSION is greater than target version $KUBERNETES_VERSION" fi # Check for upgrades required on remote primaries for node in "${!KUBERNETES_REMOTE_PRIMARIES[@]}"; do semverCompare "${KUBERNETES_REMOTE_PRIMARY_VERSIONS[$node]}" "$KUBERNETES_VERSION" if [ "$SEMVER_COMPARE_RESULT" = "-1" ]; then KUBERNETES_UPGRADE=1 elif [ "$SEMVER_COMPARE_RESULT" = "1" ]; then bail "The current Kubernetes version $CURRENT_KUBERNETES_VERSION is greater than target version $KUBERNETES_VERSION on remote primary $node" fi done # Check for upgrades required on remote secondaries for node in "${!KUBERNETES_SECONDARIES[@]}"; do semverCompare "${KUBERNETES_SECONDARY_VERSIONS[$node]}" "$KUBERNETES_VERSION" if [ "$SEMVER_COMPARE_RESULT" = "-1" ]; then KUBERNETES_UPGRADE=1 elif [ "$SEMVER_COMPARE_RESULT" = "1" ]; then bail "The current Kubernetes version $CURRENT_KUBERNETES_VERSION is greater than target version $KUBERNETES_VERSION on remote worker $node" fi done } function discover_local_kubernetes_version() { grep -s ' image: ' /etc/kubernetes/manifests/kube-apiserver.yaml | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' } function get_docker_version() { if ! commandExists "docker" ; then return fi docker -v | awk '{gsub(/,/, "", $3); print $3}' } function discover_public_ip() { if [ "$AIRGAP" == "1" ]; then return fi if [ -n "$PUBLIC_ADDRESS" ]; then return fi # gce set +e _out=$(curl --noproxy "*" --max-time 5 --connect-timeout 2 -qSfs -H 'Metadata-Flavor: Google' http://169.254.169.254/computeMetadata/v1/instance/network-interfaces/0/access-configs/0/external-ip 2>/dev/null) _status=$? set -e if [ "$_status" -eq "0" ] && [ -n "$_out" ]; then if isValidIpv4 "$_out" || isValidIpv6 "$_out"; then PUBLIC_ADDRESS=$_out fi return fi # ec2 _out=$(discover_public_ip_ec2_imdsv2) if [ -n "$_out" ]; then PUBLIC_ADDRESS=$_out return fi _out=$(discover_public_ip_ec2_imdsv1) if [ -n "$_out" ]; then PUBLIC_ADDRESS=$_out return fi # azure set +e _out=$(curl --noproxy "*" --max-time 5 --connect-timeout 2 -qSfs -H Metadata:true "http://169.254.169.254/metadata/instance/network/interface/0/ipv4/ipAddress/0/publicIpAddress?api-version=2017-08-01&format=text" 2>/dev/null) _status=$? set -e if [ "$_status" -eq "0" ] && [ -n "$_out" ]; then if isValidIpv4 "$_out" || isValidIpv6 "$_out"; then PUBLIC_ADDRESS=$_out fi return fi } function discover_public_ip_ec2_imdsv1() { local public_address status set +e public_address=$(curl --noproxy "*" --max-time 5 --connect-timeout 2 -qSfs http://169.254.169.254/latest/meta-data/public-ipv4 2>/dev/null) status=$? set -e if [ "$status" -eq 0 ] && [ -n "$public_address" ]; then if isValidIpv4 "$public_address" || isValidIpv6 "$public_address"; then echo "$public_address" fi fi } function discover_public_ip_ec2_imdsv2() { local token public_address status token=$(curl --noproxy "*" --max-time 5 --connect-timeout 2 -qSfs -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" 2>/dev/null) if [ -z "$token" ]; then return fi set +e public_address=$(curl --noproxy "*" --max-time 5 --connect-timeout 2 -qSfs -H "X-aws-ec2-metadata-token: $token" http://169.254.169.254/latest/meta-data/public-ipv4 2>/dev/null) status=$? set -e if [ "$status" -eq 0 ] && [ -n "$public_address" ]; then if isValidIpv4 "$public_address" || isValidIpv6 "$public_address"; then echo "$public_address" fi fi } function discover_private_ip() { if [ -n "$PRIVATE_ADDRESS" ]; then return 0 fi PRIVATE_ADDRESS="$(${K8S_DISTRO}_discover_private_ip)" } function discover_non_loopback_nameservers() { local resolvConf=/etc/resolv.conf # https://github.com/kubernetes/kubernetes/blob/v1.19.3/cmd/kubeadm/app/componentconfigs/kubelet.go#L211 if systemctl is-active -q systemd-resolved; then resolvConf=/run/systemd/resolve/resolv.conf fi cat $resolvConf | grep -E '^nameserver\s+' | grep -Eqv '^nameserver\s+127' } function init_daemon_json() { if [ -f /etc/docker/daemon.json ]; then return fi mkdir -p /etc/docker # Change cgroup driver to systemd # Docker uses cgroupfs by default to manage cgroup. On distributions using systemd, # i.e. RHEL and Ubuntu, this causes issues because there are now 2 seperate ways # to manage resources. For more info see the link below. # https://github.com/kubernetes/kubeadm/issues/1394#issuecomment-462878219 # if [ ! -f /var/lib/kubelet/kubeadm-flags.env ]; then cat > /etc/docker/daemon.json < /etc/docker/daemon.json </dev/null | grep 'Storage Driver' | awk '{print $3}' | awk -F- '{print $1}') if [ "$_driver" = "devicemapper" ] && docker info 2>/dev/null | grep -Fqs 'Data loop file:' ; then printf "${RED}The running Docker daemon is configured to use the 'devicemapper' storage driver \ in loopback mode.\nThis is not recommended for production use. Please see to the following URL for more \ information.\n\nhttps://help.replicated.com/docs/kb/developer-resources/devicemapper-warning/.${NC}\n\n\ " # HARD_FAIL_ON_LOOPBACK if [ -n "$1" ]; then printf "${RED}Please configure a recommended storage driver and try again.${NC}\n\n" exit 1 fi printf "Do you want to proceed anyway? " if ! confirmN; then exit 0 fi fi } docker_configure_proxy() { log "Configuring docker proxy" local previous_http_proxy=$(docker info 2>/dev/null | grep -i 'Http Proxy:' | awk '{ print $NF }') local previous_https_proxy=$(docker info 2>/dev/null | grep -i 'Https Proxy:' | awk '{ print $NF }') local previous_no_proxy=$(docker info 2>/dev/null | grep -i 'No Proxy:' | awk '{ print $NF }') log "Previous http proxy: ($previous_http_proxy)" log "Previous https proxy: ($previous_https_proxy)" log "Previous no proxy: ($previous_no_proxy)" if [ "$PROXY_ADDRESS" = "$previous_http_proxy" ] && [ "$PROXY_HTTPS_ADDRESS" = "$previous_https_proxy" ] && [ "$NO_PROXY_ADDRESSES" = "$previous_no_proxy" ]; then log "No changes were found. Proxy configuration still the same" return fi log "Updating proxy configuration: HTTP_PROXY=${PROXY_ADDRESS} HTTPS_PROXY=${PROXY_HTTPS_ADDRESS} NO_PROXY=${NO_PROXY_ADDRESSES}" mkdir -p /etc/systemd/system/docker.service.d local file=/etc/systemd/system/docker.service.d/http-proxy.conf echo "# Generated by kURL" > $file echo "[Service]" >> $file echo "Environment=\"HTTP_PROXY=${PROXY_ADDRESS}\" \"HTTPS_PROXY=${PROXY_HTTPS_ADDRESS}\" \"NO_PROXY=${NO_PROXY_ADDRESSES}\"" >> $file restart_docker } function docker_get_host_packages_online() { local version="$1" if [ "$AIRGAP" != "1" ] && [ -n "$DIST_URL" ]; then rm -rf $DIR/packages/docker/${version} # Cleanup broken/incompatible packages from failed runs local package="docker-${version}.tar.gz" package_download "${package}" tar xf "$(package_filepath "${package}")" # rm docker-${version}.tar.gz fi } # It will only uninstall docker if is a new installation # and the installer has containerd set to workaround the bug issue: # `dpkg: no, cannot proceed with removal of containerd ... docker.io # depends on containerd (>= 1.2.6-0ubuntu1~) containerd is to be removed.` # More info: https://bugs.launchpad.net/ubuntu/+source/docker.io/+bug/1940920 # https://bugs.launchpad.net/ubuntu/+source/docker.io/+bug/1939140 function uninstall_docker_new_installs_with_containerd() { # If docker is not installed OR if containerd is not in the spec # then, the docker should not be uninstalled if ! commandExists docker || [ -z "$CONTAINERD_VERSION" ]; then return fi # if k8s is installed already then, the docker should not be uninstalled # so that it can be properly migrated to containerd if kubernetes_resource_exists kube-system configmap kurl-config; then return fi logStep "Uninstalling Docker to avoid conflicts with containerd package.\n" if [ "$(docker ps -aq | wc -l)" != "0" ] ; then docker ps -aq | xargs docker rm -f || true fi # The rm -rf /var/lib/docker command below may fail with device busy error, so remove as much # data as possible now docker system prune --all --volumes --force || true systemctl disable docker.service --now || true # Note that the docker.io can only be removed because it is prior install containerd and # it is a new install. Otherwise, this dep is required. # Important: The conflict is only removed when we uninstall docker.io case "$LSB_DIST" in ubuntu) export DEBIAN_FRONTEND=noninteractive dpkg --purge docker.io docker-ce docker-ce-cli ;; centos|rhel|amzn|ol) local dockerPackages=("docker" "docker.io" "docker-ce" "docker-ce-cli") if rpm -qa | grep -q 'docker-ce-rootless-extras'; then dockerPackages+=("docker-ce-rootless-extras") fi if rpm -qa | grep -q 'docker-scan-plugin'; then dockerPackages+=("docker-scan-plugin") fi local installedDockerPackages=() # Check if each Docker-related package is installed and add to the list if so for package in "${dockerPackages[@]}"; do if sudo rpm -q "$package" &>/dev/null; then installedPackages+=("$package") fi done rpm --erase ${installedPackages[@]} ;; esac rm -rf /var/lib/docker /var/lib/dockershim || true rm -f /var/run/dockershim.sock || true rm -f /var/run/docker.sock || true echo "Docker successfully uninstalled to allow to install containerd." } function install_helm() { if [ -n "$HELM_HELMFILE_SPEC" ] && kubernetes_is_master; then BIN_HELM=$DIR/bin/helm BIN_HELMFILE=$DIR/bin/helmfile cp -f $DIR/helm/helm $DIR/bin cp -f $DIR/helm/helmfile $DIR/bin fi } function helmfile_sync() { if [ -z "$HELM_HELMFILE_SPEC" ]; then return 0 fi logStep "Installing Helm Charts using the Helmfile Spec" # TODO (dan): add reporting for helm # report_helm_start printf "${HELM_HELMFILE_SPEC}" > helmfile-tmp.yaml if [ "$AIRGAP" != "1" ]; then $BIN_HELMFILE -b $BIN_HELM --file helmfile-tmp.yaml deps # || report_helm_failure #TODO (dan): add reporting fi # TODO (dan): To support air gap case, we might need to modify the helmfile to always run the local chart $BIN_HELMFILE -b $BIN_HELM --file helmfile-tmp.yaml sync # || report_helm_failure #TODO (dan): add reporting rm helmfile-tmp.yaml # TODO (dan): add reporting for helm # report_helm_success } function helm_load() { if [ "$AIRGAP" = "1" ] && [ -n "$HELM_HELMFILE_SPEC" ] ; then # TODO (dan): Implement airgapped loading after bundler is updated bail "Airgap Installation with Helm is currently not supported" #load_images $DIR/helm-bundle/images fi } #!/bin/bash function install_host_archives() { local dir="$1" local dir_prefix="/archives" local packages=("${@:2}") _install_host_packages "$dir" "$dir_prefix" "${packages[@]}" } function install_host_packages() { local dir="$1" local dir_prefix="" local packages=("${@:2}") _install_host_packages "$dir" "$dir_prefix" "${packages[@]}" } function rpm_force_install_host_archives() { local dir="$1" local dir_prefix="/archives" local packages=("${@:2}") _rpm_force_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" } function rpm_force_install_host_packages() { local dir="$1" local dir_prefix="" local packages=("${@:2}") _rpm_force_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" } function _install_host_packages() { local dir="$1" local dir_prefix="$2" local packages=("${@:3}") case "$LSB_DIST" in ubuntu) _dpkg_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" ;; centos|rhel|ol|rocky) if [ "$DIST_VERSION_MAJOR" = "9" ]; then _yum_install_host_packages_el9 "$dir" "$dir_prefix" "${packages[@]}" else _yum_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" fi ;; amzn) if is_amazon_2023; then _yum_install_host_packages_el9 "$dir" "$dir_prefix" "${packages[@]}" else local fullpath= fullpath="$(realpath "${dir}")/rhel-7-force${dir_prefix}" if test -n "$(shopt -s nullglob; echo "${fullpath}"/*.rpm)" ; then _rpm_force_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" else _yum_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" fi fi ;; *) bail "Host package install is not supported on ${LSB_DIST} ${DIST_MAJOR}" ;; esac } function _rpm_force_install_host_packages() { if [ "${SKIP_SYSTEM_PACKAGE_INSTALL}" == "1" ]; then logStep "Skipping installation of host packages: ${packages[*]}" return fi local dir="$1" local dir_prefix="$2" local packages=("${@:3}") logStep "Installing host packages ${packages[*]}" local fullpath= fullpath="$(realpath "${dir}")/rhel-7-force${dir_prefix}" if ! test -n "$(shopt -s nullglob; echo "${fullpath}"/*.rpm)" ; then echo "Will not install host packages ${packages[*]}, no packages found." return 0 fi rpm --upgrade --force --nodeps --nosignature "${fullpath}"/*.rpm logSuccess "Host packages ${packages[*]} installed" } function dpkg_install_host_archives() { local dir="$1" local dir_prefix="/archives" local packages=("${@:2}") _dpkg_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" } function dpkg_install_host_packages() { local dir="$1" local dir_prefix="" local packages=("${@:2}") _dpkg_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" } function _dpkg_apt_get_status_and_maybe_fix_broken_pkgs() { logStep "Checking package manager status" if apt-get check status ; then logSuccess "Status checked successfully. No broken packages were found." return fi logWarn "Attempting to correct broken packages by running 'apt-get install --fix-broken --no-remove --yes'" # Let's use || true here for when be required to remove the packages we properly should the error message # with the steps to get it fix manually apt-get install --fix-broken --no-remove --yes || true if apt-get check status ; then logSuccess "Broken packages fixed successfully" return fi logFail "Unable to fix broken packages. Manual intervention is required." logFail "Run the command 'apt-get check status' to get further information." } function _dpkg_install_host_packages() { if [ "${SKIP_SYSTEM_PACKAGE_INSTALL}" == "1" ]; then logStep "Skipping installation of host packages: ${packages[*]}" return fi local dir="$1" local dir_prefix="$2" local packages=("${@:3}") logStep "Installing host packages ${packages[*]}" local fullpath= fullpath="${dir}/ubuntu-${DIST_VERSION}${dir_prefix}" if ! test -n "$(shopt -s nullglob; echo "${fullpath}"/*.deb)" ; then echo "Will not install host packages ${packages[*]}, no packages found." return 0 fi # first install attempt can fail with pre-dependency problems # retrying seems to fix the issue for i in {1..3} ; do if DEBIAN_FRONTEND=noninteractive dpkg --install --force-depends-version --force-confold --auto-deconfigure "${fullpath}"/*.deb ; then break fi if [ "$i" -eq 3 ]; then logFail "Failed to install host packages ${packages[*]}" return 1 fi logWarn "Failed to install host packages ${packages[*]}, retrying..." done logSuccess "Host packages ${packages[*]} installed" _dpkg_apt_get_status_and_maybe_fix_broken_pkgs } function yum_install_host_archives() { local dir="$1" local dir_prefix="/archives" local packages=("${@:2}") if [ "$DIST_VERSION_MAJOR" = "9" ] || [ "$DIST_VERSION_MAJOR" = "2023" ]; then _yum_install_host_packages_el9 "$dir" "$dir_prefix" "${packages[@]}" else _yum_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" fi } function yum_install_host_packages() { local dir="$1" local dir_prefix="" local packages=("${@:2}") if [ "$DIST_VERSION_MAJOR" = "9" ] || [ "$DIST_VERSION_MAJOR" = "2023" ]; then _yum_install_host_packages_el9 "$dir" "$dir_prefix" "${packages[@]}" else _yum_install_host_packages "$dir" "$dir_prefix" "${packages[@]}" fi } function _yum_install_host_packages() { if [ "${SKIP_SYSTEM_PACKAGE_INSTALL}" == "1" ]; then logStep "Skipping installation of host packages: ${packages[*]}" return fi local dir="$1" local dir_prefix="$2" local packages=("${@:3}") logStep "Installing host packages ${packages[*]}" local fullpath= fullpath="$(_yum_get_host_packages_path "${dir}" "${dir_prefix}")" if ! test -n "$(shopt -s nullglob; echo "${fullpath}"/*.rpm)" ; then echo "Will not install host packages ${packages[*]}, no packages found." return 0 fi cat > /etc/yum.repos.d/kurl.local.repo < $next_version" if semverCompare "$next_version" "$previous_version" && [ "$SEMVER_COMPARE_RESULT" -lt "0" ]; then if uname -r | grep -q "el8" ; then yum --disablerepo=* --enablerepo=kurl.local downgrade --allowerasing -y "${packages[@]}" else yum --disablerepo=* --enablerepo=kurl.local downgrade -y "${packages[@]}" fi fi logSuccess "Downgraded containerd" fi # shellcheck disable=SC2086 if [[ "${packages[*]}" == *"containerd.io"* && -n $(uname -r | grep "el8") ]]; then yum --disablerepo=* --enablerepo=kurl.local install --allowerasing -y "${packages[@]}" else yum --disablerepo=* --enablerepo=kurl.local install -y "${packages[@]}" fi yum clean metadata --disablerepo=* --enablerepo=kurl.local rm /etc/yum.repos.d/kurl.local.repo reset_dnf_module_kurl_local logSuccess "Host packages ${packages[*]} installed" } function _yum_install_host_packages_el9() { if [ "${SKIP_SYSTEM_PACKAGE_INSTALL}" == "1" ]; then logStep "Skipping installation of host packages: ${packages[*]}" return fi local dir="$1" local dir_prefix="$2" local packages=("${@:3}") logStep "Installing host packages ${packages[*]}" local fullpath= fullpath="$(_yum_get_host_packages_path "$dir" "$dir_prefix")" if ! test -n "$(shopt -s nullglob; echo "$fullpath"/*.rpm)" ; then echo "Will not install host packages ${packages[*]}, no packages found." return 0 fi local repoprefix= repoprefix="$(echo "${dir%"/"}" | awk -F'/' '{ print $(NF-1) "-" $NF }')" if [ -n "$dir_prefix" ]; then repoprefix="$repoprefix.${dir_prefix/#"/"}" fi local reponame="$repoprefix.kurl.local" local repopath="$KURL_INSTALL_DIRECTORY.repos/$repoprefix" mkdir -p "$KURL_INSTALL_DIRECTORY.repos" rm -rf "$repopath" cp -r "$fullpath" "$repopath" cat > "/etc/yum.repos.d/$reponame.repo" </dev/null || true } # host_packages_shipped returns true if we do ship host packages for the distro # we are running the installation on. function host_packages_shipped() { if ! is_rhel_9_variant && ! is_amazon_2023 && ! is_ubuntu_2404; then return 0 fi return 1 } # is_rhel_9_variant returns 0 if the current distro is RHEL 9 or a derivative function is_rhel_9_variant() { if [ "$DIST_VERSION_MAJOR" != "9" ]; then return 1 fi case "$LSB_DIST" in centos|rhel|ol|rocky) return 0 ;; *) return 1 ;; esac } # is_amazon_2023 returns 0 if the current distro is Amazon 2023. function is_amazon_2023() { if [ "$DIST_VERSION_MAJOR" != "2023" ]; then return 1 fi if [ "$LSB_DIST" != "amzn" ]; then return 1 fi return 0 } # is_ubuntu_2404 returns 0 if the current distro is Ubuntu 24.04. function is_ubuntu_2404() { if [ "$DIST_VERSION_MAJOR" != "24" ]; then return 1 fi if [ "$LSB_DIST" != "ubuntu" ]; then return 1 fi return 0 } # ensure_host_package calls either _apt_ensure_host_package or _yum_ensure_host_package function ensure_host_package() { local yum_package="$1" local apt_package="$2" case "$LSB_DIST" in ubuntu) if [ -n "$apt_package" ] && [ "$apt_package" != "skip" ]; then _apt_ensure_host_package "$apt_package" fi ;; centos|rhel|ol|rocky|amzn) if [ -n "$yum_package" ] && [ "$yum_package" != "skip" ]; then _yum_ensure_host_package "$yum_package" fi ;; *) bail "Host package checks are not supported on ${LSB_DIST} ${DIST_MAJOR}" ;; esac } # _apt_ensure_host_package ensures that a package is installed on the host function _apt_ensure_host_package() { local package="$1" if ! apt_is_host_package_installed "$package" ; then logStep "Installing host package $package" if ! apt install -y "$package" ; then logFail "Failed to install host package $package." logFail "Please install $package and try again." bail " apt install $package" fi logSuccess "Host package $package installed" fi } # _yum_ensure_host_package ensures that a package is installed on the host function _yum_ensure_host_package() { local package="$1" if ! yum_is_host_package_installed "$package" ; then logStep "Installing host package $package" if ! yum install -y "$package" ; then logFail "Failed to install host package $package." logFail "Please install $package and try again." bail " yum install $package" fi logSuccess "Host package $package installed" fi } # preflights_require_host_packages ensures that all required host packages are installed. function preflights_require_host_packages() { if host_packages_shipped ; then return fi logStep "Checking required host packages" local seen=() local fail=0 local skip=0 # shellcheck disable=SC2044 for deps_file in $(find . -name Deps); do while read -r dep ; do skip=0 for seen_item in "${seen[@]}"; do if [ "$dep" = "$seen_item" ]; then skip=1 break fi done if [ "$skip" = "1" ]; then continue fi # use rpm to check rhel/centos/ol/rocky/amzn and dpkg-query to check ubuntu case "$LSB_DIST" in centos|rhel|ol|rocky) if ! echo "$deps_file" | grep -q "rhel-9"; then continue fi seen+=("$dep") if rpm -q "$dep" >/dev/null 2>&1 ; then continue fi fail=1 ;; amzn) if ! echo "$deps_file" | grep -q "amazon-2023"; then continue fi seen+=("$dep") if rpm -q "$dep" >/dev/null 2>&1 ; then continue fi fail=1 ;; ubuntu) if ! echo "$deps_file" | grep -q "ubuntu-24"; then continue fi seen+=("$dep") if dpkg-query -W -f='${Status}' "$dep" 2>/dev/null | grep -q "ok installed"; then continue fi fail=1 ;; *) logFail "Host package checks are not supported on ${LSB_DIST} ${DIST_MAJOR}" fail=1 ;; esac component=$(echo "$deps_file" | awk -F'/' '{print $3}') if [ "$component" = "host" ]; then logFail "Host package $dep is required" continue fi version=$(echo "$deps_file" | awk -F'/' '{print $4}') logFail "Host package $dep is required for $component version $version" done <"$deps_file" done if [ "$fail" = "1" ]; then echo "" log "Host packages are missing. Please install them and re-run the install script." exit 1 fi logSuccess "Required host packages are installed or available" } # apt_is_host_package_installed returns 0 if the package is installed on the host function apt_is_host_package_installed() { local package="$1" log "Checking if $package is installed" dpkg-query -W -f='${Status}' "$package" 2>/dev/null | grep -q "ok installed" >/dev/null } # yum_is_host_package_installed returns 0 if the package is installed on the host function yum_is_host_package_installed() { local package="$1" log "Checking if $package is installed" yum list installed "$package" >/dev/null 2>&1 } #!/bin/bash function kubernetes_pre_init() { if ! host_packages_shipped ; then # git is packaged in the bundle and installed in other oses by # kubernetes_install_host_packages ensure_host_package git git fi } function kubernetes_host() { kubernetes_load_modules kubernetes_load_ipv4_modules kubernetes_load_ipv6_modules kubernetes_load_ipvs_modules if [ "$SKIP_KUBERNETES_HOST" = "1" ]; then return 0 fi kubernetes_install_host_packages "$KUBERNETES_VERSION" kubernetes_load_images "$KUBERNETES_VERSION" install_plugins install_kustomize } function kubernetes_load_images() { local version="$1" local varname="KUBERNETES_IMAGES_LOADED_${version//./_}" if [ "${!varname:-}" = "1" ]; then # images already loaded for this version return 0 fi load_images "$DIR/packages/kubernetes/$version/images" if [ -n "$SONOBUOY_VERSION" ] && [ -d "$DIR/packages/kubernetes-conformance/$version/images" ]; then load_images "$DIR/packages/kubernetes-conformance/$version/images" fi declare -g "$varname"=1 } function kubernetes_get_packages() { if [ "$AIRGAP" != "1" ] && [ -n "$DIST_URL" ]; then kubernetes_get_host_packages_online "$KUBERNETES_VERSION" kubernetes_get_conformance_packages_online "$KUBERNETES_VERSION" fi } # kubernetes_maybe_get_packages_airgap downloads kubernetes packages if they are not already present function kubernetes_maybe_get_packages_airgap() { if [ "$AIRGAP" != "1" ]; then return fi if [ -d "$DIR/packages/kubernetes/$KUBERNETES_VERSION/assets" ]; then return fi addon_fetch_airgap "kubernetes" "$KUBERNETES_VERSION" } function kubernetes_load_ipvs_modules() { if lsmod | grep -q ip_vs ; then return fi local mod_nf_conntrack="nf_conntrack_ipv4" if [ "$KERNEL_MAJOR" -gt "4" ] || \ { [ "$KERNEL_MAJOR" -eq "4" ] && [ "$KERNEL_MINOR" -ge "19" ]; } || \ { { [ "$LSB_DIST" = "ol" ] || [ "$LSB_DIST" = "rhel" ] || [ "$LSB_DIST" = "centos" ] || [ "$LSB_DIST" = "rocky" ]; } && \ { [ "$DIST_VERSION_MAJOR" = "8" ] || [ "$DIST_VERSION_MAJOR" = "9" ] || [ "$DIST_VERSION_MINOR" -gt "2" ]; }; \ }; then mod_nf_conntrack="nf_conntrack" fi modprobe $mod_nf_conntrack rm -f /etc/modules-load.d/replicated-ipvs.conf echo "Adding kernel modules ip_vs, ip_vs_rr, ip_vs_wrr, and ip_vs_sh" modprobe ip_vs modprobe ip_vs_rr modprobe ip_vs_wrr modprobe ip_vs_sh echo $mod_nf_conntrack > /etc/modules-load.d/99-replicated-ipvs.conf # shellcheck disable=SC2129 echo "ip_vs" >> /etc/modules-load.d/99-replicated-ipvs.conf echo "ip_vs_rr" >> /etc/modules-load.d/99-replicated-ipvs.conf echo "ip_vs_wrr" >> /etc/modules-load.d/99-replicated-ipvs.conf echo "ip_vs_sh" >> /etc/modules-load.d/99-replicated-ipvs.conf } function kubernetes_load_modules() { if ! lsmod | grep -Fq br_netfilter ; then echo "Adding kernel module br_netfilter" modprobe br_netfilter fi echo "br_netfilter" > /etc/modules-load.d/99-replicated.conf } function kubernetes_load_ipv4_modules() { if [ "$IPV6_ONLY" = "1" ]; then return 0 fi if ! lsmod | grep -q ^ip_tables ; then echo "Adding kernel module ip_tables" modprobe ip_tables fi echo "ip_tables" > /etc/modules-load.d/99-replicated-ipv4.conf echo "net.bridge.bridge-nf-call-iptables = 1" > /etc/sysctl.d/99-replicated-ipv4.conf echo "net.ipv4.conf.all.forwarding = 1" >> /etc/sysctl.d/99-replicated-ipv4.conf sysctl --system if [ "$(cat /proc/sys/net/ipv4/ip_forward)" = "0" ]; then bail "Failed to enable IP forwarding." fi } function kubernetes_load_ipv6_modules() { if [ "$IPV6_ONLY" != "1" ]; then return 0 fi if ! lsmod | grep -q ^ip6_tables ; then echo "Adding kernel module ip6_tables" modprobe ip6_tables fi echo "ip6_tables" > /etc/modules-load.d/99-replicated-ipv6.conf echo "net.bridge.bridge-nf-call-ip6tables = 1" > /etc/sysctl.d/99-replicated-ipv6.conf echo "net.ipv6.conf.all.forwarding = 1" >> /etc/sysctl.d/99-replicated-ipv6.conf sysctl --system if [ "$(cat /proc/sys/net/ipv6/conf/all/forwarding)" = "0" ]; then bail "Failed to enable IPv6 forwarding." fi } # k8sVersion is an argument because this may be used to install step versions of K8s during an upgrade # to the target version function kubernetes_install_host_packages() { k8sVersion=$1 logStep "Install kubelet, kubectl and cni host packages" if kubernetes_host_commands_ok "$k8sVersion"; then logSuccess "Kubernetes host packages already installed" kubernetes_cis_chmod_kubelet_service_file # less command is broken if libtinfo.so.5 is missing in amazon linux 2 if [ "$LSB_DIST" == "amzn" ] && [ "$DIST_VERSION_MAJOR" != "2023" ] && [ "$AIRGAP" != "1" ] && ! file_exists "/usr/lib64/libtinfo.so.5"; then if [ -d "$DIR/packages/kubernetes/${k8sVersion}/assets" ]; then install_host_packages "${DIR}/packages/kubernetes/${k8sVersion}" ncurses-compat-libs fi fi return fi cat > "$DIR/tmp-kubeadm.conf" < /dev/null } function spinner_kubernetes_api_healthy() { if ! spinner_until 120 kubernetes_api_is_healthy; then bail "Kubernetes API failed to report healthy" fi } function spinner_containerd_is_healthy() { if ! spinner_until 120 containerd_is_healthy; then bail "Containerd failed to restart" fi } # With AWS NLB kubectl commands may fail to connect to the Kubernetes API immediately after a single # successful health check function spinner_kubernetes_api_stable() { echo "Waiting for kubernetes api health to report ok" for i in {1..10}; do sleep 1 spinner_kubernetes_api_healthy done } function kubernetes_drain() { local deleteEmptydirDataFlag="--delete-emptydir-data" local k8sVersion= k8sVersion=$(grep ' image: ' /etc/kubernetes/manifests/kube-apiserver.yaml | grep -oE '[0-9]+\.[0-9]+\.[0-9]+') local k8sVersionMinor= k8sVersionMinor=$(kubernetes_version_minor "$k8sVersion") if [ "$k8sVersionMinor" -lt "20" ]; then deleteEmptydirDataFlag="--delete-local-data" fi # --pod-selector='app!=csi-attacher,app!=csi-provisioner' # https://longhorn.io/docs/1.3.2/volumes-and-nodes/maintenance/#updating-the-node-os-or-container-runtime if kubernetes_has_remotes ; then kubectl drain "$1" \ "$deleteEmptydirDataFlag" \ --ignore-daemonsets \ --force \ --grace-period=30 \ --timeout=120s \ --pod-selector 'app notin (rook-ceph-mon,rook-ceph-osd,rook-ceph-osd-prepare,rook-ceph-operator,rook-ceph-agent),k8s-app!=kube-dns, name notin (restic)' || true else # On single node installs force drain to delete pods or # else the command will timeout when evicting pods with pod disruption budgets kubectl drain "$1" \ "$deleteEmptydirDataFlag" \ --ignore-daemonsets \ --force \ --grace-period=30 \ --timeout=120s \ --disable-eviction \ --pod-selector 'app notin (rook-ceph-mon,rook-ceph-osd,rook-ceph-osd-prepare,rook-ceph-operator,rook-ceph-agent),k8s-app!=kube-dns, name notin (restic)' || true fi } function kubernetes_node_has_version() { local name="$1" local version="$2" local actual_version="$(try_1m kubernetes_node_kubelet_version $name)" [ "$actual_version" = "v${version}" ] } function kubernetes_node_kubelet_version() { local name="$1" kubectl get node "$name" -o=jsonpath='{@.status.nodeInfo.kubeletVersion}' } function kubernetes_any_remote_master_unupgraded() { while read -r master; do local name=$(echo $master | awk '{ print $1 }') if ! kubernetes_node_has_version "$name" "$KUBERNETES_VERSION"; then return 0 fi done < <(kubernetes_remote_masters) return 1 } function kubernetes_any_worker_unupgraded() { while read -r worker; do local name=$(echo $worker | awk '{ print $1 }') if ! kubernetes_node_has_version "$name" "$KUBERNETES_VERSION"; then return 0 fi done < <(kubernetes_workers) return 1 } function kubelet_version() { kubelet --version | cut -d ' ' -f 2 | sed 's/v//' } function kubernetes_scale_down() { local ns="$1" local kind="$2" local name="$3" kubernetes_scale "$ns" "$kind" "$name" "0" } function kubernetes_scale() { local ns="$1" local kind="$2" local name="$3" local replicas="$4" if ! kubernetes_resource_exists "$ns" "$kind" "$name"; then return 0 fi kubectl -n "$ns" scale "$kind" "$name" --replicas="$replicas" } function kubernetes_secret_value() { local ns="$1" local name="$2" local key="$3" kubectl -n "$ns" get secret "$name" -ojsonpath="{ .data.$key }" 2>/dev/null | base64 --decode } function kubernetes_is_master() { if [ "$MASTER" = "1" ]; then return 0 elif [ -f /etc/kubernetes/manifests/kube-apiserver.yaml ]; then return 0 else return 1 fi } function discover_pod_subnet() { # TODO check ipv6 cidr for overlaps if [ "$IPV6_ONLY" = "1" ]; then if [ -z "$POD_CIDR" ]; then POD_CIDR="fd00:c00b:1::/112" fi return 0 fi local excluded="" if ! ip route show src "$PRIVATE_ADDRESS" | awk '{ print $1 }' | grep -q '/'; then excluded="--exclude-subnet=${PRIVATE_ADDRESS}/16" fi if [ -n "$POD_CIDR" ]; then local podCidrSize=$(echo $POD_CIDR | awk -F'/' '{ print $2 }') # if pod-cidr flag and pod-cidr-range are both set, validate pod-cidr is as large as pod-cidr-range if [ -n "$POD_CIDR_RANGE" ]; then if [ "$podCidrSize" -gt "$POD_CIDR_RANGE" ]; then bail "Pod cidr must be at least /$POD_CIDR_RANGE" fi fi # if pod cidr flag matches existing weave pod cidr don't validate if [ "$POD_CIDR" = "$EXISTING_POD_CIDR" ]; then return 0 elif [ -n "$EXISTING_POD_CIDR" ]; then bail "Pod cidr cannot be changed to $POD_CIDR because existing cidr is $EXISTING_POD_CIDR" fi if $DIR/bin/subnet --subnet-alloc-range "$POD_CIDR" --cidr-range "$podCidrSize" "$excluded" 1>/dev/null; then return 0 fi printf "${RED}Pod cidr ${POD_CIDR} overlaps with existing route. Continue? ${NC}" if ! confirmY ; then exit 1 fi return 0 fi # detected from weave device if [ -n "$EXISTING_POD_CIDR" ]; then echo "Using existing pod network ${EXISTING_POD_CIDR}" POD_CIDR="$EXISTING_POD_CIDR" return 0 fi local size="$POD_CIDR_RANGE" if [ -z "$size" ]; then size="20" fi # find a network for the Pods, preferring start at 10.32.0.0 if podnet=$($DIR/bin/subnet --subnet-alloc-range "10.32.0.0/16" --cidr-range "$size" "$excluded"); then echo "Found pod network: $podnet" POD_CIDR="$podnet" return 0 fi if podnet=$($DIR/bin/subnet --subnet-alloc-range "10.0.0.0/8" --cidr-range "$size" "$excluded"); then echo "Found pod network: $podnet" POD_CIDR="$podnet" return 0 fi if [ -n "$WEAVE_VERSION" ] ; then bail "Failed to find an available /${size} subnet for the pod network within either 10.32.0.0/16 or 10.0.0.0/8. \n Use Weave's podCIDR parameter to set a pod network that is not already in use. \n https://kurl.sh/docs/add-ons/weave#advanced-install-options" else bail "Failed to find an available /${size} subnet for the pod network within either 10.32.0.0/16 or 10.0.0.0/8. \n Use Flannel's podCIDR parameter to set a pod network that is not already in use. \n https://kurl.sh/docs/add-ons/flannel#advanced-install-options" fi } # This must run after discover_pod_subnet since it excludes the pod cidr function discover_service_subnet() { # TODO check ipv6 cidr for overlaps if [ "$IPV6_ONLY" = "1" ]; then if [ -z "$SERVICE_CIDR" ]; then SERVICE_CIDR="fd00:c00b:2::/112" fi return 0 fi local excluded="--exclude-subnet=$POD_CIDR" if ! ip route show src "$PRIVATE_ADDRESS" | awk '{ print $1 }' | grep -q '/'; then excluded="$excluded,${PRIVATE_ADDRESS}/16" fi EXISTING_SERVICE_CIDR=$(maybe kubeadm_cluster_configuration | grep serviceSubnet | awk '{ print $2 }') if [ -n "$SERVICE_CIDR" ]; then local serviceCidrSize=$(echo $SERVICE_CIDR | awk -F'/' '{ print $2 }') # if service-cidr flag and service-cidr-range are both set, validate service-cidr is as large as service-cidr-range if [ -n "$SERVICE_CIDR_RANGE" ]; then if [ "$serviceCidrSize" -gt "$SERVICE_CIDR_RANGE" ]; then bail "Service cidr must be at least /$SERVICE_CIDR_RANGE" fi fi # if service-cidr flag matches existing service cidr don't validate if [ "$SERVICE_CIDR" = "$EXISTING_SERVICE_CIDR" ]; then return 0 elif [ -n "$EXISTING_SERVICE_CIDR" ]; then bail "Service cidr cannot be changed to $SERVICE_CIDR because existing cidr is $EXISTING_SERVICE_CIDR" fi if $DIR/bin/subnet --subnet-alloc-range "$SERVICE_CIDR" --cidr-range "$serviceCidrSize" "$excluded" 1>/dev/null; then return 0 fi printf "${RED}Service cidr ${SERVICE_CIDR} overlaps with existing route. Continue? ${NC}" if ! confirmY ; then exit 1 fi return 0 fi if [ -n "$EXISTING_SERVICE_CIDR" ]; then echo "Using existing service network ${EXISTING_SERVICE_CIDR}" SERVICE_CIDR="$EXISTING_SERVICE_CIDR" return 0 fi local size="$SERVICE_CIDR_RANGE" if [ -z "$size" ]; then size="22" fi # find a network for the services, preferring start at 10.96.0.0 if servicenet=$($DIR/bin/subnet --subnet-alloc-range "10.96.0.0/16" --cidr-range "$size" "$excluded"); then echo "Found service network: $servicenet" SERVICE_CIDR="$servicenet" return 0 fi if servicenet=$($DIR/bin/subnet --subnet-alloc-range "10.0.0.0/8" --cidr-range "$size" "$excluded"); then echo "Found service network: $servicenet" SERVICE_CIDR="$servicenet" return 0 fi bail "Failed to find an available /${size} subnet for the service network within either 10.32.0.0/16 or 10.0.0.0/8. \n Use Kubernetes's serviceCIDR parameter to set a pod network that is not already in use. \n https://kurl.sh/docs/add-ons/kubernetes#advanced-install-options" } function kubernetes_node_images() { local nodeName="$1" kubectl get node "$nodeName" -ojsonpath="{range .status.images[*]}{ range .names[*] }{ @ }{'\n'}{ end }{ end }" } function list_all_required_images() { echo "$KURL_UTIL_IMAGE" find packages/kubernetes/$KUBERNETES_VERSION -type f -name Manifest 2>/dev/null | xargs cat | grep -E '^image' | grep -v no_remote_load | awk '{ print $3 }' if [ -n "$DOCKER_VERSION" ]; then find packages/docker/$DOCKER_VERSION -type f -name Manifest 2>/dev/null | xargs cat | grep -E '^image' | grep -v no_remote_load | awk '{ print $3 }' fi for dir in addons/*/ ; do local addon= addon=$(basename "$dir") local varname="${addon^^}_VERSION" varname="${varname//-/_}" local addon_version="${!varname}" if [ -z "$addon_version" ]; then continue fi local manifest_file="addons/$addon/$addon_version/Manifest" if [ ! -f "$manifest_file" ]; then continue fi grep -E '^image' "$manifest_file" | grep -v no_remote_load | awk '{ print $3 }' done } function kubernetes_node_has_all_images() { local node_name="$1" local image_list= while read -r image; do if ! kubernetes_node_has_image "$node_name" "$image"; then image_list="$image_list $image" fi done < <(list_all_required_images) image_list=$(echo "$image_list" | xargs) # strip leading and trailing whitespace if [ -n "$image_list" ]; then log "" logWarn "Node $node_name missing image(s) $image_list" return 1 fi } # kubernetes_nodes_missing_images will return a list of nodes that are missing any of the images in # the provided list function kubernetes_nodes_missing_images() { local images_list="$1" local target_host="$2" local exclude_hosts="$3" if [ -z "$images_list" ]; then return fi # shellcheck disable=SC2086 "$DIR"/bin/kurl cluster nodes-missing-images --image="$KURL_UTIL_IMAGE" --target-host="$target_host" --exclude-host="$exclude_hosts" $images_list } function kubernetes_node_has_image() { local node_name="$1" local image="$2" while read -r node_image; do if [ "$(canonical_image_name "$node_image")" = "$(canonical_image_name "$image")" ]; then return 0 fi done < <(kubernetes_node_images "$node_name") return 1 } KUBERNETES_REMOTE_PRIMARIES=() KUBERNETES_REMOTE_PRIMARY_VERSIONS=() function kubernetes_get_remote_primaries() { local primary= while read -r primary ; do local name= name=$(echo "$primary" | awk '{ print $1 }') local version= version="$(try_1m kubernetes_node_kubelet_version "$name")" KUBERNETES_REMOTE_PRIMARIES+=( "$name" ) KUBERNETES_REMOTE_PRIMARY_VERSIONS+=( "${version#v}" ) # strip leading v done < <(kubernetes_remote_masters) } KUBERNETES_SECONDARIES=() KUBERNETES_SECONDARY_VERSIONS=() function kubernetes_get_secondaries() { local secondary= while read -r secondary ; do local name= name=$(echo "$secondary" | awk '{ print $1 }') local version= version="$(try_1m kubernetes_node_kubelet_version "$name")" KUBERNETES_SECONDARIES+=( "$name" ) KUBERNETES_SECONDARY_VERSIONS+=( "${version#v}" ) # strip leading v done < <(kubernetes_workers) } function kubernetes_load_balancer_address() { maybe kubeadm_cluster_configuration | grep 'controlPlaneEndpoint:' | sed 's/controlPlaneEndpoint: \|"//g' } function kubernetes_pod_started() { local name=$1 local namespace=$2 local phase=$(kubectl -n $namespace get pod $name -ojsonpath='{ .status.phase }') case "$phase" in Running|Failed|Succeeded) return 0 ;; esac return 1 } function kubernetes_pod_completed() { local name=$1 local namespace=$2 local phase=$(kubectl -n $namespace get pod $name -ojsonpath='{ .status.phase }') case "$phase" in Failed|Succeeded) return 0 ;; esac return 1 } function kubernetes_pod_succeeded() { local name="$1" local namespace="$2" local phase=$(kubectl -n $namespace get pod $name -ojsonpath='{ .status.phase }') [ "$phase" = "Succeeded" ] } function kubernetes_is_current_cluster() { local api_service_address="$1" if grep -sq "${api_service_address}" /etc/kubernetes/kubelet.conf ; then return 0 fi if grep -sq "${api_service_address}" "$KUBEADM_CONF_FILE" ; then return 0 fi return 1 } function kubernetes_is_join_node() { if grep -sq 'kind: JoinConfiguration' "$KUBEADM_CONF_FILE" ; then return 0 fi return 1 } function kubernetes_is_installed() { if kubectl cluster-info >/dev/null 2>&1 ; then return 0 fi if ps aux | grep '[k]ubelet' ; then return 0 fi if commandExists kubelet ; then return 0 fi return 1 } function kubeadm_cluster_configuration() { kubectl get cm -o yaml -n kube-system kubeadm-config -ojsonpath='{ .data.ClusterConfiguration }' } function kubeadm_cluster_status() { kubectl get cm -o yaml -n kube-system kubeadm-config -ojsonpath='{ .data.ClusterStatus }' } function check_network() { logStep "Checking cluster networking" if [ -n "$WEAVE_VERSION" ]; then log "Checking if weave-net binary can be found in the path /opt/cni/bin/" if ! ls -la /opt/cni/bin/ | grep weave-net; then logWarn "Unable to find weave-net binary, deleting weave-net pod so that the binary will be recreated" kubectl delete pods --selector=name=weave-net --field-selector="spec.nodeName=$(get_local_node_name)" -n kube-system --ignore-not-found=true fi fi if ! kubernetes_any_node_ready; then echo "Waiting up to 10 minutes for node to report Ready" if ! spinner_until 600 kubernetes_any_node_ready ; then # Output the nodes for we know more about the problem kubectl get nodes bail "Any Node failed to report Ready" fi fi kubectl delete pods kurlnet-client kurlnet-server --force --grace-period=0 &>/dev/null || true cat </dev/null } function kubernetes_service_exists() { kubectl -n default get service kubernetes &>/dev/null } function kubernetes_all_nodes_ready() { local node_statuses= node_statuses="$(kubectl get nodes --no-headers 2>/dev/null | awk '{ print $2 }')" # no nodes are not ready and at least one node is ready if echo "${node_statuses}" | grep -q 'NotReady' && \ echo "${node_statuses}" | grep -v 'NotReady' | grep -q 'Ready' ; then return 1 fi return 0 } function kubernetes_any_node_ready() { if kubectl get nodes --no-headers 2>/dev/null | awk '{ print $2 }' | grep -v 'NotReady' | grep -q 'Ready' ; then return 0 fi return 1 } # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is # greater than the total resource capacity on the node. Additionally, if the end range exceeds the total # resource capacity of the node, we use the total resource capacity as the end of the range. # Args: # $1 total available resource on the worker node in input unit (either millicores for CPU or Mi for memory) # $2 start of the resource range in input unit # $3 end of the resource range in input unit # $4 percentage of range to reserve in percent*100 (to allow for two decimal digits) # Return: # amount of resource to reserve in input unit function get_resource_to_reserve_in_range() { local total_resource_on_instance=$1 local start_range=$2 local end_range=$3 local percentage=$4 resources_to_reserve="0" if (( $total_resource_on_instance > $start_range )); then resources_to_reserve=$(((($total_resource_on_instance < $end_range ? \ $total_resource_on_instance : $end_range) - $start_range) * $percentage / 100 / 100)) fi echo $resources_to_reserve } # Calculates the amount of memory to reserve for the kubelet in mebibytes from the total memory available on the instance. # From the total memory capacity of this worker node, we calculate the memory resources to reserve # by reserving a percentage of the memory in each range up to the total memory available on the instance. # We are using these memory ranges from GKE (https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#node_allocatable): # 255 Mi of memory for machines with less than 1024Mi of memory # 25% of the first 4096Mi of memory # 20% of the next 4096Mi of memory (up to 8192Mi) # 10% of the next 8192Mi of memory (up to 16384Mi) # 6% of the next 114688Mi of memory (up to 131072Mi) # 2% of any memory above 131072Mi # Args: # $1 total available memory on the machine in Mi # Return: # memory to reserve in Mi for the kubelet function get_memory_mebibytes_to_reserve() { local total_memory_on_instance=$1 local memory_ranges=(0 4096 8192 16384 131072 $total_memory_on_instance) local memory_percentage_reserved_for_ranges=(2500 2000 1000 600 200) if (( $total_memory_on_instance <= 1024 )); then memory_to_reserve="255" else memory_to_reserve="0" for i in ${!memory_percentage_reserved_for_ranges[@]}; do local start_range=${memory_ranges[$i]} local end_range=${memory_ranges[(($i+1))]} local percentage_to_reserve_for_range=${memory_percentage_reserved_for_ranges[$i]} memory_to_reserve=$(($memory_to_reserve + \ $(get_resource_to_reserve_in_range $total_memory_on_instance $start_range $end_range $percentage_to_reserve_for_range))) done fi echo $memory_to_reserve } # Calculates the amount of CPU to reserve for the kubelet in millicores from the total number of vCPUs available on the instance. # From the total core capacity of this worker node, we calculate the CPU resources to reserve by reserving a percentage # of the available cores in each range up to the total number of cores available on the instance. # We are using these CPU ranges from GKE (https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#node_allocatable): # 6% of the first core # 1% of the next core (up to 2 cores) # 0.5% of the next 2 cores (up to 4 cores) # 0.25% of any cores above 4 cores # Args: # $1 total number of millicores on the instance (number of vCPUs * 1000) # Return: # CPU resources to reserve in millicores (m) function get_cpu_millicores_to_reserve() { local total_cpu_on_instance=$1 local cpu_ranges=(0 1000 2000 4000 $total_cpu_on_instance) local cpu_percentage_reserved_for_ranges=(600 100 50 25) cpu_to_reserve="0" for i in ${!cpu_percentage_reserved_for_ranges[@]}; do local start_range=${cpu_ranges[$i]} local end_range=${cpu_ranges[(($i+1))]} local percentage_to_reserve_for_range=${cpu_percentage_reserved_for_ranges[$i]} cpu_to_reserve=$(($cpu_to_reserve + \ $(get_resource_to_reserve_in_range $total_cpu_on_instance $start_range $end_range $percentage_to_reserve_for_range))) done echo $cpu_to_reserve } function file_exists() { local filename=$1 if ! test -f "$filename"; then return 1 fi } # checks if the service in ns $1 with name $2 has endpoints function kubernetes_service_healthy() { local namespace=$1 local name=$2 kubectl -n "$namespace" get endpoints "$name" --no-headers | grep -v "" &>/dev/null } function kubernetes_version_minor() { local k8sVersion="$1" # shellcheck disable=SC2001 echo "$k8sVersion" | sed 's/v\?[0-9]*\.\([0-9]*\)\.[0-9]*/\1/' } # kubernetes_configure_pause_image will make kubelet aware of the pause (sandbox) image used by # containerd. This will prevent the kubelet from garbage collecting the pause image. # This flag will be removed in kubernetes 1.27. # NOTE: this configures the kubelet to use the pause image used by containerd rather than the other # way around. function kubernetes_configure_pause_image() { local dir="$1" #shellcheck disable=SC2153 if [ "$KUBERNETES_TARGET_VERSION_MAJOR" != 1 ] || [ "$KUBERNETES_TARGET_VERSION_MINOR" -ge 27 ]; then return fi local CONTAINERD_PAUSE_IMAGE= #shellcheck disable=SC2034 CONTAINERD_PAUSE_IMAGE="$(kubernetes_containerd_pause_image)" if [ -z "$CONTAINERD_PAUSE_IMAGE" ]; then return fi insert_patches_strategic_merge "$dir/kustomization.yaml" "kubelet-args-pause-image.patch.yaml" render_yaml_file_2 "$dir/kubelet-args-pause-image.patch.tmpl.yaml" > "$dir/kubelet-args-pause-image.patch.yaml" # templatize the kubeadm version field as this will be rendered (again) when generating the final kubeadm conf file # shellcheck disable=SC2016 sed -i 's|kubeadm.k8s.io/v1beta.*|kubeadm.k8s.io/$(kubeadm_conf_api_version)|' "$dir/kubelet-args-pause-image.patch.yaml" } KUBELET_FLAGS_FILE="/var/lib/kubelet/kubeadm-flags.env" # KURL_HOSTNAME_OVERRIDE can be used to override the node name used by kURL KURL_HOSTNAME_OVERRIDE=${KURL_HOSTNAME_OVERRIDE:-} # kubernetes_init_hostname sets the HOSTNAME variable to equal the hostname binary output. If # KURL_HOSTNAME_OVERRIDE is set, it will be used instead. Otherwise, if the kubelet flags file # contains a --hostname-override flag, it will be used instead. function kubernetes_init_hostname() { export HOSTNAME if [ -n "$KURL_HOSTNAME_OVERRIDE" ]; then HOSTNAME="$KURL_HOSTNAME_OVERRIDE" fi local hostname_override= hostname_override="$(kubernetes_get_kubelet_hostname_override)" if [ -n "$hostname_override" ] ; then HOSTNAME="$hostname_override" fi HOSTNAME="$(hostname | tr '[:upper:]' '[:lower:]')" } # kubernetes_get_kubelet_hostname_override returns the value of the --hostname-override flag in the # kubelet env flags file. function kubernetes_get_kubelet_hostname_override() { if [ -f "$KUBELET_FLAGS_FILE" ]; then grep -o '\--hostname-override=[^" ]*' "$KUBELET_FLAGS_FILE" | awk -F'=' '{ print $2 }' fi } # kubernetes_configure_pause_image_upgrade will check if the pause image used by containerd has # changed. If it has, it will update the kubelet flags to use the new pause image and restart the # kubelet. function kubernetes_configure_pause_image_upgrade() { local CONTAINERD_PAUSE_IMAGE= #shellcheck disable=SC2034 CONTAINERD_PAUSE_IMAGE="$(kubernetes_containerd_pause_image)" if [ -z "$CONTAINERD_PAUSE_IMAGE" ]; then return fi if [ ! -f "$KUBELET_FLAGS_FILE" ]; then return fi local old_pause_image= old_pause_image="$(grep -o '\--pod-infra-container-image=[^" ]*' "$KUBELET_FLAGS_FILE" | awk -F'=' '{ print $2 }')" # if the pause image is not set this may be a version of kubelet that does not support the flag if [ -z "$old_pause_image" ] || [ "$old_pause_image" = "$CONTAINERD_PAUSE_IMAGE" ]; then return fi sed -i "s|$old_pause_image|$CONTAINERD_PAUSE_IMAGE|" "$KUBELET_FLAGS_FILE" systemctl daemon-reload systemctl restart kubelet } # kubernetes_containerd_pause_image will return the pause image used by containerd. function kubernetes_containerd_pause_image() { if [ -z "$CONTAINERD_VERSION" ] || [ ! -f /etc/containerd/config.toml ] ; then return fi grep sandbox_image /etc/containerd/config.toml | sed 's/[=\"]//g' | awk '{ print $2 }' } # kubernetes_kustomize_config_migrate fixes missing and deprecated fields in kustomization file function kubernetes_kustomize_config_migrate() { local kustomize_dir=$1 if [ "$KUBERNETES_TARGET_VERSION_MINOR" -ge "27" ]; then # TODO: Currently this is using kustomize 3.5.4 to migrate the config due to a bug in # kustomize v5: https://github.com/kubernetes-sigs/kustomize/issues/5149 ( cd "$kustomize_dir" && kustomize edit fix ) fi } # kubernetes_configure_coredns is a workaround to reset the custom nameserver config in the coredns # configmap. This runs after kubeadm init or upgrade which will reset the coredns configmap if it # finds that it is the default. the issue is that it does a fuzzy match and if only the nameserver # is set kubeadm determines that it is the default and it replaces the configmap. function kubernetes_configure_coredns() { if [ -z "$NAMESERVER" ]; then return 0 fi kubectl -n kube-system get configmap coredns -oyaml > /tmp/Corefile # Example lines to replace from k8s 1.17 and 1.19 # "forward . /etc/resolv.conf" => "forward . 8.8.8.8" # "forward . /etc/resolv.conf {" => "forward . 8.8.8.8 {" sed -i "s/forward \. \/etc\/resolv\.conf/forward \. ${NAMESERVER}/" /tmp/Corefile kubectl -n kube-system replace configmap coredns -f /tmp/Corefile kubectl -n kube-system rollout restart deployment/coredns } # shellcheck disable=SC2148 function object_store_exists() { if [ -n "$OBJECT_STORE_ACCESS_KEY" ] && \ [ -n "$OBJECT_STORE_SECRET_KEY" ] && \ [ -n "$OBJECT_STORE_CLUSTER_IP" ]; then return 0 else return 1 fi } function object_store_running() { if kubernetes_resource_exists rook-ceph secret rook-ceph-object-user-rook-ceph-store-kurl || kubernetes_resource_exists minio get secret minio-credentials; then return 0 fi return 1 } function object_store_create_bucket() { if object_store_bucket_exists "$1" ; then echo "object store bucket $1 exists" return 0 fi if ! _object_store_create_bucket "$1" ; then if object_store_exists; then return 1 fi bail "attempted to create bucket $1 but no object store configured" fi echo "object store bucket $1 created" } function _object_store_create_bucket() { local bucket=$1 local acl="x-amz-acl:private" local d=$(LC_TIME="en_US.UTF-8" TZ="UTC" date +"%a, %d %b %Y %T %z") local string="PUT\n\n\n${d}\n${acl}\n/$bucket" local sig=$(echo -en "${string}" | openssl dgst -sha1 -hmac "${OBJECT_STORE_SECRET_KEY}" -binary | base64) local addr=$($DIR/bin/kurl netutil format-ip-address "$OBJECT_STORE_CLUSTER_IP") curl -fsSL -X PUT \ --globoff \ --noproxy "*" \ -H "Host: $OBJECT_STORE_CLUSTER_IP" \ -H "Date: $d" \ -H "$acl" \ -H "Authorization: AWS $OBJECT_STORE_ACCESS_KEY:$sig" \ "http://$addr/$bucket" >/dev/null 2>&1 } function object_store_bucket_exists() { local bucket=$1 local acl="x-amz-acl:private" local d=$(LC_TIME="en_US.UTF-8" TZ="UTC" date +"%a, %d %b %Y %T %z") local string="HEAD\n\n\n${d}\n${acl}\n/$bucket" local sig=$(echo -en "${string}" | openssl dgst -sha1 -hmac "${OBJECT_STORE_SECRET_KEY}" -binary | base64) local addr=$($DIR/bin/kurl netutil format-ip-address "$OBJECT_STORE_CLUSTER_IP") curl -fsSL -I \ --globoff \ --noproxy "*" \ -H "Host: $OBJECT_STORE_CLUSTER_IP" \ -H "Date: $d" \ -H "$acl" \ -H "Authorization: AWS $OBJECT_STORE_ACCESS_KEY:$sig" \ "http://$addr/$bucket" >/dev/null 2>&1 } # migrate_object_store creates a pod that migrates data between two different object stores. receives # the namespace, the source and destination addresses, access keys and secret keys. returns once the # pos has been finished or a timeout of 30 minutes has been reached. function migrate_object_store() { local namespace=$1 local source_addr=$2 local source_access_key=$3 local source_secret_key=$4 local destination_addr=$5 local destination_access_key=$6 local destination_secret_key=$7 kubectl -n "$namespace" delete pod sync-object-store --force --grace-period=0 --ignore-not-found cat < /dev/null return 0 fi return 1 } function migrate_between_object_stores() { local source_host=$1 local source_access_key=$2 local source_secret_key=$3 local destination_host=$4 local destination_addr=$5 local destination_access_key=$6 local destination_secret_key=$7 if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl scale deploy ekc-operator --replicas=0 log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logFail "Unable to scale down ekco operator" return 1 fi fi get_shared if ! migrate_object_store "default" "$source_host" "$source_access_key" "$source_secret_key" "$destination_host" "$destination_access_key" "$destination_secret_key" ; then # even if the migration failed, we need to ensure ekco is running again if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl scale deploy ekc-operator --replicas=1 fi bail "sync-object-store pod failed" fi # ensure ekco is running again if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl scale deploy ekc-operator --replicas=1 fi # Update kotsadm to use new object store if kubernetes_resource_exists default secret kotsadm-s3; then echo "Updating kotsadm to use $destination_host" kubectl patch secret kotsadm-s3 -p "{\"stringData\":{\"access-key-id\":\"${destination_access_key}\",\"secret-access-key\":\"${destination_secret_key}\",\"endpoint\":\"http://${destination_host}\",\"object-store-cluster-ip\":\"${destination_addr}\"}}" if kubernetes_resource_exists default deployment kotsadm; then kubectl rollout restart deployment kotsadm elif kubernetes_resource_exists default statefulset kotsadm; then kubectl rollout restart statefulset kotsadm fi fi local newIP=$($DIR/bin/kurl netutil format-ip-address "$destination_addr") # Update registry to use new object store if kubernetes_resource_exists kurl configmap registry-config; then echo "Updating registry to use $destination_host" local temp_file= temp_file=$(mktemp) kubectl -n kurl get configmap registry-config -ojsonpath='{ .data.config\.yml }' | sed "s/regionendpoint: http.*/regionendpoint: http:\/\/${newIP}/" > "$temp_file" kubectl -n kurl delete configmap registry-config kubectl -n kurl create configmap registry-config --from-file=config.yml="$temp_file" rm "$temp_file" fi if kubernetes_resource_exists kurl secret registry-s3-secret; then kubectl -n kurl patch secret registry-s3-secret -p "{\"stringData\":{\"access-key-id\":\"${destination_access_key}\",\"secret-access-key\":\"${destination_secret_key}\",\"object-store-cluster-ip\":\"${destination_addr}\",\"object-store-hostname\":\"http://${destination_host}\"}}" fi if kubernetes_resource_exists kurl deployment registry; then kubectl -n kurl rollout restart deployment registry fi # Update velero to use new object store only if currently using object store since velero may have already been # updated to use an off-cluster object store. if kubernetes_resource_exists velero backupstoragelocation default; then echo "Updating velero to use new object store $destination_host" s3Url=$(kubectl -n velero get backupstoragelocation default -ojsonpath='{ .spec.config.s3Url }') if [ "$s3Url" = "http://${source_host}" ]; then kubectl -n velero patch backupstoragelocation default --type=merge -p "{\"spec\":{\"config\":{\"s3Url\":\"http://${destination_host}\",\"publicUrl\":\"http://${newIP}\"}}}" while read -r resticrepo; do oldResticIdentifier=$(kubectl -n velero get resticrepositories "$resticrepo" -ojsonpath="{ .spec.resticIdentifier }") newResticIdentifier=$(echo "$oldResticIdentifier" | sed "s/${source_host}/${destination_host}/") kubectl -n velero patch resticrepositories "$resticrepo" --type=merge -p "{\"spec\":{\"resticIdentifier\":\"${newResticIdentifier}\"}}" done < <(kubectl -n velero get resticrepositories --selector=velero.io/storage-location=default --no-headers | awk '{ print $1 }') else echo "The Velero default backupstoragelocation was not $source_host, not updating to use $destination_host" fi fi if kubernetes_resource_exists velero secret cloud-credentials; then if kubectl -n velero get secret cloud-credentials -ojsonpath='{ .data.cloud }' | base64 -d | grep -q "$source_access_key"; then local temp_file= temp_file=$(mktemp) kubectl -n velero get secret cloud-credentials -ojsonpath='{ .data.cloud }' | base64 -d > "$temp_file" sed -i "s/aws_access_key_id=.*/aws_access_key_id=${destination_access_key}/" "$temp_file" sed -i "s/aws_secret_access_key=.*/aws_secret_access_key=${destination_secret_key}/" "$temp_file" cloud=$(cat "$temp_file" | base64 -w 0) kubectl -n velero patch secret cloud-credentials -p "{\"data\":{\"cloud\":\"${cloud}\"}}" rm "$temp_file" else echo "The Velero cloud-credentials secret did not contain credentials for $source_host, not updating to use $destination_host credentials" fi fi if kubernetes_resource_exists velero daemonset restic; then kubectl -n velero rollout restart daemonset restic fi if kubernetes_resource_exists velero deployment velero; then kubectl -n velero rollout restart deployment velero fi printf "\n${GREEN}Object store migration completed successfully${NC}\n" return 0 } function migrate_rgw_to_minio_checks() { logStep "Running Rook Ceph Object Store to Minio migration checks ..." if ! rook_is_healthy_to_migrate_from; then bail "Cannot upgrade from Rook Ceph Object Store to Minio. Rook Ceph is unhealthy." fi log "Wating for Rook Ceph Object Store health ..." if ! spinner_until 300 rook_rgw_check_if_is_healthy ; then logFail "Failed to detect healthy Rook Ceph Object Store" bail "Cannot upgrade from Rook Ceph Object Store to Minio. Rook Ceph is unhealthy." fi log "Awaiting 2 minutes to check MinIO Pod(s) are Running" if ! spinner_until 120 check_for_running_pods "$MINIO_NAMESPACE"; then logFail "MinIO has unhealthy Pod(s). Check the namespace $MINIO_NAMESPACE " bail "Cannot upgrade from Rook to MinIO. MinIO is unhealthy." fi logSuccess "Rook Ceph Object Store to Minio migration checks completed successfully." } function rook_rgw_check_if_is_healthy() { local IP=$(kubectl -n rook-ceph get service rook-ceph-rgw-rook-ceph-store | tail -n1 | awk '{ print $3}') curl --globoff --noproxy "*" --fail --silent --insecure "http://${IP}" > /dev/null } function migrate_rgw_to_minio() { report_addon_start "rook-ceph-to-minio" "v1.1" migrate_rgw_to_minio_checks RGW_HOST="rook-ceph-rgw-rook-ceph-store.rook-ceph" RGW_ACCESS_KEY_ID=$(kubectl -n rook-ceph get secret rook-ceph-object-user-rook-ceph-store-kurl -o yaml | grep AccessKey | head -1 | awk '{print $2}' | base64 --decode) RGW_ACCESS_KEY_SECRET=$(kubectl -n rook-ceph get secret rook-ceph-object-user-rook-ceph-store-kurl -o yaml | grep SecretKey | head -1 | awk '{print $2}' | base64 --decode) MINIO_HOST="minio.${MINIO_NAMESPACE}" MINIO_CLUSTER_IP=$(kubectl -n ${MINIO_NAMESPACE} get service minio | tail -n1 | awk '{ print $3}') MINIO_ACCESS_KEY_ID=$(kubectl -n ${MINIO_NAMESPACE} get secret minio-credentials -ojsonpath='{ .data.MINIO_ACCESS_KEY }' | base64 --decode) MINIO_ACCESS_KEY_SECRET=$(kubectl -n ${MINIO_NAMESPACE} get secret minio-credentials -ojsonpath='{ .data.MINIO_SECRET_KEY }' | base64 --decode) migrate_between_object_stores "$RGW_HOST" "$RGW_ACCESS_KEY_ID" "$RGW_ACCESS_KEY_SECRET" "$MINIO_HOST" "$MINIO_CLUSTER_IP" "$MINIO_ACCESS_KEY_ID" "$MINIO_ACCESS_KEY_SECRET" report_addon_success "rook-ceph-to-minio" "v1.1" } function migrate_minio_to_rgw() { local minio_ns="$MINIO_NAMESPACE" if [ -z "$minio_ns" ]; then minio_ns=minio fi if ! kubernetes_resource_exists $minio_ns deployment minio && ! kubernetes_resource_exists $minio_ns statefulset ha-minio; then return 0 fi report_addon_start "minio-to-rook-ceph" "v1.1" MINIO_HOST="minio.${minio_ns}" MINIO_ACCESS_KEY_ID=$(kubectl -n ${minio_ns} get secret minio-credentials -ojsonpath='{ .data.MINIO_ACCESS_KEY }' | base64 --decode) MINIO_ACCESS_KEY_SECRET=$(kubectl -n ${minio_ns} get secret minio-credentials -ojsonpath='{ .data.MINIO_SECRET_KEY }' | base64 --decode) RGW_HOST="rook-ceph-rgw-rook-ceph-store.rook-ceph" RGW_CLUSTER_IP=$(kubectl -n rook-ceph get service rook-ceph-rgw-rook-ceph-store | tail -n1 | awk '{ print $3}') RGW_ACCESS_KEY_ID=$(kubectl -n rook-ceph get secret rook-ceph-object-user-rook-ceph-store-kurl -o yaml | grep AccessKey | head -1 | awk '{print $2}' | base64 --decode) RGW_ACCESS_KEY_SECRET=$(kubectl -n rook-ceph get secret rook-ceph-object-user-rook-ceph-store-kurl -o yaml | grep SecretKey | head -1 | awk '{print $2}' | base64 --decode) migrate_between_object_stores "$MINIO_HOST" "$MINIO_ACCESS_KEY_ID" "$MINIO_ACCESS_KEY_SECRET" "$RGW_HOST" "$RGW_CLUSTER_IP" "$RGW_ACCESS_KEY_ID" "$RGW_ACCESS_KEY_SECRET" report_addon_success "minio-to-rook-ceph" "v1.1" } export KUBECTL_PLUGINS_PATH=/usr/local/bin function install_plugins() { pushd "$DIR/krew" tar xzf preflight.tar.gz --no-same-owner && chown root:root preflight && mv preflight /usr/local/bin/kubectl-preflight tar xzf support-bundle.tar.gz --no-same-owner && chown root:root support-bundle && mv support-bundle /usr/local/bin/kubectl-support_bundle popd # uninstall system-wide krew from old versions of kurl rm -rf /opt/replicated/krew sed -i '/^export KUBECTL_PLUGINS_PATH.*KREW_ROOT/d' /etc/profile sed -i '/^export KREW_ROOT.*replicated/d' /etc/profile } function install_kustomize() { if ! kubernetes_is_master; then return 0 elif [ ! -d "$DIR/packages/kubernetes/${k8sVersion}/assets" ]; then echo "Kustomize package is missing in your distribution. Skipping." return 0 fi kustomize_dir=/usr/local/bin pushd "$DIR/packages/kubernetes/${k8sVersion}/assets" for file in $(ls kustomize-*);do if [ "${file: -6}" == "tar.gz" ];then tar xf ${file} chmod a+x kustomize mv kustomize /usr/local/bin/${file%%.tar*} else # Earlier versions of kustomize weren't archived/compressed chmod a+x ${file} cp ${file} ${kustomize_dir} fi done popd if ls ${kustomize_dir}/kustomize-* 1>/dev/null 2>&1;then latest_binary=$(basename $(ls ${kustomize_dir}/kustomize-* | sort -V | tail -n 1)) # Link to the latest version ln -s -f ${kustomize_dir}/${latest_binary} ${kustomize_dir}/kustomize fi } # preflights are run on all nodes for init.sh, join.sh, and upgrade.sh function preflights() { require64Bit bailIfUnsupportedOS bailIfUnsupportedKubernetesVersion mustSwapoff prompt_if_docker_unsupported_os check_docker_k8s_version checkFirewalld checkUFW must_disable_selinux apply_iptables_config cri_preflights host_nameservers_reachable allow_remove_docker_new_install bail_if_automated_storage_scaling_prereqs_not_met return 0 } # init_preflights are only run on the first node init.sh function init_preflights() { kotsadm_prerelease bail_when_no_object_store_and_s3_enabled bail_if_kurl_pods_are_unhealthy bail_if_unsupported_migration_from_rook_to_openebs bail_if_unsupported_migration_from_longhorn_to_openebs bail_if_kurl_version_is_lower_than_previous_config bail_if_no_object_store_or_storage_and_has_registry return 0 } # if kurl pods like ekco not be running then we should bail function bail_if_kurl_pods_are_unhealthy() { if commandExists kubectl; then # if host preflight are being ignored we just move forward. if [ "${HOST_PREFLIGHT_IGNORE}" = "1" ]; then logWarn "Host preflight checks being ignored, moving on without checking for unhealthy pods in kurl namespace." return 0 fi log "Awaiting 2 minutes to check kURL Pod(s) are Running" if ! spinner_until 120 check_for_running_pods kurl; then kubectl get pods -n kurl # for each pod in UNHEALTHY_PODS, print it and its current status for pod in $UNHEALTHY_PODS; do kubectl get pod -n kurl "$pod" -o jsonpath='pod {.metadata.name} owned by a {.metadata.ownerReferences[*].kind} has phase {.status.phase} with containers {range .status.containerStatuses[*]}{.name} ready={.ready} restarts={.restartCount};{end}' done bail "Kurl has unhealthy Pod(s) $UNHEALTHY_PODS. Restarting the pod may fix the issue." fi fi } function join_preflights() { preflights_require_no_kubernetes_or_current_node return 0 } function require_root_user() { local user="$(id -un 2>/dev/null || true)" if [ "$user" != "root" ]; then bail "Error: this installer needs to be run as root." fi } function require64Bit() { case "$(uname -m)" in *64) ;; *) echo >&2 'Error: you are not using a 64bit platform.' echo >&2 'This installer currently only supports 64bit platforms.' exit 1 ;; esac } function bailIfUnsupportedOS() { case "$LSB_DIST$DIST_VERSION" in ubuntu16.04) logWarn "Install is not supported on Ubuntu 16.04. Installation of Kubernetes will be best effort." ;; rhel7.4|rhel7.5|rhel7.6|rhel7.7|rhel7.8|rhel7.9|centos7.4|centos7.5|centos7.6|centos7.7|centos7.8|centos7.9|ol7.4|ol7.5|ol7.6|ol7.7|ol7.8|ol7.9) logWarn "Install is not supported on EOL RHEL/CentOS. Installation of Kubernetes will be best effort." ;; ubuntu18.04|ubuntu20.04|ubuntu22.04|ubuntu24.04) ;; rhel8.0|rhel8.1|rhel8.2|rhel8.3|rhel8.4|rhel8.5|rhel8.6|rhel8.7|rhel8.8|rhel8.9|rhel8.10|rhel9.0|rhel9.1|rhel9.2|rhel9.3|rhel9.4|rhel9.5|rhel9) ;; rocky9.0|rocky9.1|rocky9.2|rocky9.3|rocky9.4|rocky9.5|rocky9) ;; centos8|centos8.0|centos8.1|centos8.2|centos8.3|centos8.4|centos9) ;; amzn2|amzn2023) ;; ol8.0|ol8.1|ol8.2|ol8.3|ol8.4|ol8.5|ol8.6|ol8.7|ol8.8|ol8.9|ol8.10) ;; *) bail "Kubernetes install is not supported on ${LSB_DIST} ${DIST_VERSION}. The list of supported operating systems can be viewed at https://kurl.sh/docs/install-with-kurl/system-requirements." ;; esac } # bailIfUnsupportedKubernetesVersion bails out if the Kubernetes version we are # about to install isn't supported by the underlying Operating System. function bailIfUnsupportedKubernetesVersion() { if is_amazon_2023 ; then if [ "$KUBERNETES_TARGET_VERSION_MINOR" -lt "24" ]; then bail "Kubernetes versions < 1.24 are not supported on Amazon Linux 2023." fi fi } function mustSwapoff() { if swap_is_on || swap_is_enabled; then printf "\n${YELLOW}This application is incompatible with memory swapping enabled. Disable swap to continue?${NC} " if confirmY ; then printf "=> Running swapoff --all\n" swapoff --all if swap_fstab_enabled; then swap_fstab_disable fi if swap_service_enabled; then swap_service_disable fi if swap_azure_linux_agent_enabled; then swap_azure_linux_agent_disable fi logSuccess "Swap disabled.\n" else bail "\nDisable swap with swapoff --all and remove all swap entries from /etc/fstab before re-running this script" fi fi } function swap_is_on() { swapon --summary | grep --quiet " " # todo this could be more specific, swapon -s returns nothing if its off } function swap_is_enabled() { swap_fstab_enabled || swap_service_enabled || swap_azure_linux_agent_enabled } function swap_fstab_enabled() { cat /etc/fstab | grep --quiet --ignore-case --extended-regexp '^[^#]+swap' } function swap_fstab_disable() { printf "=> Commenting swap entries in /etc/fstab \n" sed --in-place=.bak '/\bswap\b/ s/^/#/' /etc/fstab printf "=> A backup of /etc/fstab has been made at /etc/fstab.bak\n\n" printf "\n${YELLOW}Changes have been made to /etc/fstab. We recommend reviewing them after completing this installation to ensure mounts are correctly configured.${NC}\n\n" sleep 5 # for emphasis of the above ^ } # This is a service on some Azure VMs that just enables swap function swap_service_enabled() { systemctl -q is-enabled temp-disk-swapfile 2>/dev/null } function swap_service_disable() { printf "=> Disabling temp-disk-swapfile service\n" systemctl disable temp-disk-swapfile } function swap_azure_linux_agent_enabled() { cat /etc/waagent.conf 2>/dev/null | grep -q 'ResourceDisk.EnableSwap=y' } function swap_azure_linux_agent_disable() { printf "=> Disabling swap in Azure Linux Agent configuration file /etc/waagent.conf\n" sed -i 's/ResourceDisk.EnableSwap=y/ResourceDisk.EnableSwap=n/g' /etc/waagent.conf } function check_docker_k8s_version() { local version= version="$(get_docker_version)" if [ -z "$version" ]; then return fi case "$KUBERNETES_TARGET_VERSION_MINOR" in 14|15) compareDockerVersions "$version" 1.13.1 if [ "$COMPARE_DOCKER_VERSIONS_RESULT" -eq "-1" ]; then bail "Minimum Docker version for Kubernetes $KUBERNETES_VERSION is 1.13.1." fi ;; esac } function prompt_if_docker_unsupported_os() { if is_docker_version_supported ; then return fi logWarn "Docker ${DOCKER_VERSION} is not supported on ${LSB_DIST} ${DIST_VERSION}." logWarn "The containerd addon is recommended. https://kurl.sh/docs/add-ons/containerd" if commandExists "docker" ; then return fi printf "${YELLOW}Continue? ${NC}" 1>&2 if ! confirmN ; then exit 1 fi } checkFirewalld() { if [ -n "$PRESERVE_DOCKER_CONFIG" ]; then return fi apply_firewalld_config if [ "$BYPASS_FIREWALLD_WARNING" = "1" ]; then return fi if ! systemctl -q is-enabled firewalld && ! systemctl -q is-active firewalld; then logSuccess "Firewalld is either not enabled or not active." return fi if [ "$HARD_FAIL_ON_FIREWALLD" = "1" ]; then printf "${RED}Firewalld is currently either enabled or active. Stop (systemctl stop firewalld) and disable Firewalld (systemctl disable firewalld) before proceeding.{NC}\n" 1>&2 exit 1 fi if [ -n "$DISABLE_FIREWALLD" ]; then systemctl stop firewalld systemctl disable firewalld return fi printf "${YELLOW}Firewalld is currently either enabled or active. To ensure smooth installation and avoid potential issues, it is highly recommended to stop and disable Firewalld. Please press 'Y' to proceed with stopping and disabling Firewalld.${NC}" if confirmY ; then systemctl stop firewalld systemctl disable firewalld return fi printf "${YELLOW}Please note that if you choose to continue with Firewalld enabled and active, the installer may encounter unexpected behaviors and may not function properly. Therefore, it is strongly advised to stop and completely disable Firewalld before proceeding. Continue with firewalld enabled and/or active?${NC}" if confirmN ; then BYPASS_FIREWALLD_WARNING=1 return fi exit 1 } checkUFW() { if [ -n "$PRESERVE_DOCKER_CONFIG" ]; then return fi if [ "$BYPASS_UFW_WARNING" = "1" ]; then return fi # check if UFW is enabled and installed in systemctl if ! systemctl -q is-active ufw ; then return fi # check if UFW is active/inactive UFW_STATUS=$(ufw status | grep 'Status: ' | awk '{ print $2 }') if [ "$UFW_STATUS" = "inactive" ]; then return fi if [ "$HARD_FAIL_ON_UFW" = "1" ]; then printf "${RED}UFW is active${NC}\n" 1>&2 exit 1 fi if [ -n "$DISABLE_UFW" ]; then ufw disable return fi printf "${YELLOW}UFW is active, please press Y to disable ${NC}" if confirmY ; then ufw disable return fi printf "${YELLOW}Continue with ufw active? ${NC}" if confirmN ; then BYPASS_UFW_WARNING=1 return fi exit 1 } must_disable_selinux() { # From kubernets kubeadm docs for RHEL: # # Disabling SELinux by running setenforce 0 is required to allow containers to # access the host filesystem, which is required by pod networks for example. # You have to do this until SELinux support is improved in the kubelet. # Check and apply YAML overrides if [ -n "$PRESERVE_SELINUX_CONFIG" ]; then return fi apply_selinux_config if [ -n "$BYPASS_SELINUX_PREFLIGHT" ]; then return fi if selinux_enabled && selinux_enforced ; then if [ -n "$DISABLE_SELINUX" ]; then setenforce 0 sed -i s/^SELINUX=.*$/SELINUX=permissive/ /etc/selinux/config return fi printf "\n${YELLOW}Kubernetes is incompatible with SELinux. Disable SELinux to continue?${NC} " if confirmY ; then setenforce 0 sed -i s/^SELINUX=.*$/SELINUX=permissive/ /etc/selinux/config else bail "\nDisable SELinux with 'setenforce 0' before re-running install script" fi fi } function force_docker() { DOCKER_VERSION="20.10.17" printf "${YELLOW}NO CRI version was listed in yaml or found on host OS, defaulting to online docker install${NC}\n" printf "${YELLOW}THIS FEATURE IS NOT SUPPORTED AND WILL BE DEPRECATED IN FUTURE KURL VERSIONS${NC}\n" printf "${YELLOW}The installer did not specify a version of Docker or Containerd to include, but having one is required by all kURL installation scripts. The latest supported version ($DOCKER_VERSION) of Docker will be installed.${NC}\n" } function cri_preflights() { require_cri } function require_cri() { if is_rhel_9_variant || is_amazon_2023; then if [ -z "$CONTAINERD_VERSION" ]; then bail "Containerd is required on RHEL 9 variants and Amazon Linux 2023. Docker is not supported." fi fi if commandExists docker ; then SKIP_DOCKER_INSTALL=1 return 0 fi if commandExists ctr ; then return 0 fi if [ "$LSB_DIST" = "rhel" ]; then if [ -n "$NO_CE_ON_EE" ] && [ -z "$CONTAINERD_VERSION" ]; then printf "${RED}Enterprise Linux distributions require Docker Enterprise Edition. Please install Docker before running this installation script.${NC}\n" 1>&2 return 0 fi fi if [ "$SKIP_DOCKER_INSTALL" = "1" ]; then bail "Docker is required" fi if [ -z "$DOCKER_VERSION" ] && [ -z "$CONTAINERD_VERSION" ]; then force_docker fi return 0 } selinux_enabled() { if commandExists "selinuxenabled"; then selinuxenabled return elif commandExists "sestatus"; then ENABLED=$(sestatus | grep 'SELinux status' | awk '{ print $3 }') echo "$ENABLED" | grep --quiet --ignore-case enabled return fi return 1 } selinux_enforced() { if commandExists "getenforce"; then ENFORCED=$(getenforce) echo $(getenforce) | grep --quiet --ignore-case enforcing return elif commandExists "sestatus"; then ENFORCED=$(sestatus | grep 'SELinux mode' | awk '{ print $3 }') echo "$ENFORCED" | grep --quiet --ignore-case enforcing return fi return 1 } function kotsadm_prerelease() { if [ "$KOTSADM_VERSION" = "alpha" ] || [ "$KOTSADM_VERSION" = "nightly" ]; then if [ -n "$TESTGRID_ID" ]; then printf "\n${YELLOW}This is a prerelease version of kotsadm and should not be run in production. Continuing because this is testgrid.${NC}\n" return 0 else printf "\n${YELLOW}This is a prerelease version of kotsadm and should not be run in production. Press Y to continue.${NC} " if ! confirmN; then bail "\nWill not install prerelease version of kotsadm." fi fi fi } function host_nameservers_reachable() { if [ -n "$NAMESERVER" ] || [ "$AIRGAP" = "1" ]; then return 0 fi if ! discover_non_loopback_nameservers; then bail "\nAt least one nameserver must be accessible on a non-loopback address. Use the \"nameserver\" flag in the installer spec to override the loopback nameservers discovered on the host: https://kurl.sh/docs/add-ons/kurl" fi } function preflights_require_no_kubernetes_or_current_node() { if kubernetes_is_join_node ; then if kubernetes_is_current_cluster "${API_SERVICE_ADDRESS}" ; then return 0 fi logWarn "Kubernetes is already installed on this Node but the api server endpoint is different." printf "${YELLOW}Are you sure you want to proceed? ${NC}" 1>&2 if ! confirmN; then exit 1 fi return 0 fi if kubernetes_is_installed ; then bail "Kubernetes is already installed on this Node." fi return 0 } function preflights_system_packages() { local addonName=$1 local addonVersion=$2 local manifestPath="${DIR}/addons/${addonName}/${addonVersion}/Manifest" local preflightPath="${DIR}/addons/${addonName}/${addonVersion}/system-packages-preflight.yaml" if [ ! -f "$manifestPath" ]; then return fi local pkgs_all=() local pkgs_ubuntu=() local pkgs_centos=() local pkgs_centos8=() local pkgs_ol=() while read -r line; do if [ -z "$line" ]; then continue fi # support for comments in manifest files if [ "$(echo "$line" | cut -c1-1)" = "#" ]; then continue fi kind=$(echo "$line" | awk '{ print $1 }') case "$kind" in apt) package=$(echo "${line}" | awk '{ print $2 }') pkgs_ubuntu+=("${package}") pkgs_all+=("${package}") ;; yum) package=$(echo "${line}" | awk '{ print $2 }') pkgs_centos+=("${package}") pkgs_all+=("${package}") ;; yum8) package=$(echo "${line}" | awk '{ print $2 }') pkgs_centos8+=("${package}") pkgs_all+=("${package}") ;; yumol) package=$(echo "${line}" | awk '{ print $2 }') pkgs_ol+=("${package}") pkgs_all+=("${package}") ;; esac done < "${manifestPath}" if [ "${#pkgs_all[@]}" -eq "0" ]; then return fi local system_packages_collector=" systemPackages: collectorName: $addonName " local system_packages_analyzer=" systemPackages: collectorName: $addonName outcomes: - fail: when: '{{ not .IsInstalled }}' message: Package {{ .Name }} is not installed. - pass: message: Package {{ .Name }} is installed. " for pkg in "${pkgs_ubuntu[@]}" do system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_ubuntu[] -v "$pkg") done for pkg in "${pkgs_centos[@]}" do system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_centos[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_rhel[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_ol[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_amzn[] -v "$pkg") done for pkg in "${pkgs_centos8[@]}" do system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_centos8[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_rhel8[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_ol8[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_centos9[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_rhel9[] -v "$pkg") system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_ol9[] -v "$pkg") done for pkg in "${pkgs_ol[@]}" do system_packages_collector=$("${DIR}"/bin/yamlutil -a -yc "$system_packages_collector" -yp systemPackages_ol[] -v "$pkg") done # host preflight file not found, create one rm -rf "$preflightPath" mkdir -p "$(dirname "$preflightPath")" cat <> "$preflightPath" apiVersion: troubleshoot.sh/v1beta2 kind: HostPreflight metadata: name: "$addonName" spec: collectors: [] analyzers: [] EOF "${DIR}"/bin/yamlutil -a -fp "$preflightPath" -yp spec_collectors[] -v "$system_packages_collector" "${DIR}"/bin/yamlutil -a -fp "$preflightPath" -yp spec_analyzers[] -v "$system_packages_analyzer" echo "$preflightPath" } HOST_PREFLIGHTS_RESULTS_OUTPUT_DIR="host-preflights" function host_preflights() { local is_primary="$1" local is_join="$2" local is_upgrade="$3" local opts= local out_file= out_file="${DIR}/${HOST_PREFLIGHTS_RESULTS_OUTPUT_DIR}/results-$(date +%s).txt" mkdir -p "${DIR}/${HOST_PREFLIGHTS_RESULTS_OUTPUT_DIR}" if [ ! "${HOST_PREFLIGHT_ENFORCE_WARNINGS}" = "1" ] ; then opts="${opts} --ignore-warnings" fi if [ "${is_primary}" != "1" ]; then opts="${opts} --is-primary=false" fi if [ "${is_join}" = "1" ]; then opts="${opts} --is-join" fi if [ "${is_upgrade}" = "1" ]; then opts="${opts} --is-upgrade" fi # Remove previous file if it exists if [ -f "${VENDOR_PREFLIGHT_SPEC}" ]; then rm "$VENDOR_PREFLIGHT_SPEC" fi $DIR/bin/vendorflights -i "${MERGED_YAML_SPEC}" -o "${VENDOR_PREFLIGHT_SPEC}" if [ -f "${VENDOR_PREFLIGHT_SPEC}" ]; then opts="${opts} --spec=${VENDOR_PREFLIGHT_SPEC}" fi if [ "$EXCLUDE_BUILTIN_HOST_PREFLIGHTS" == "1" ]; then opts="${opts} --exclude-builtin" else # Adding kurl addon preflight checks for spec in $("${K8S_DISTRO}_addon_for_each" addon_preflight); do opts="${opts} --spec=${spec}" done # Add containerd preflight checks separately since it's a special addon and is not part of the addons array for spec in $(addon_preflight containerd "$CONTAINERD_VERSION"); do opts="${opts} --spec=${spec}" done fi if [ -n "$PRIMARY_HOST" ]; then opts="${opts} --primary-host=${PRIMARY_HOST}" fi if [ -n "$SECONDARY_HOST" ]; then opts="${opts} --secondary-host=${SECONDARY_HOST}" fi logStep "Running host preflights" if [ "${HOST_PREFLIGHT_IGNORE}" = "1" ]; then "${DIR}"/bin/kurl host preflight "${MERGED_YAML_SPEC}" ${opts} | tee "${out_file}" host_preflights_mkresults "${out_file}" "${opts}" else set +e "${DIR}"/bin/kurl host preflight "${MERGED_YAML_SPEC}" ${opts} | tee "${out_file}" local kurl_exit_code="${PIPESTATUS[0]}" set -e host_preflights_mkresults "${out_file}" "${opts}" case $kurl_exit_code in 3) bail "Host preflights have warnings that block the installation." ;; 2) logWarn "Host preflights have warnings" logWarn "It is highly recommended to sort out the warning conditions before proceeding." logWarn "Be aware that continuing with preflight warnings can result in failures." log "" logWarn "Would you like to continue?" if ! confirmY ; then bail "The installation will not continue" fi return 0 ;; 1) bail "Host preflights have failures that block the installation." ;; esac fi if [ "${HOST_PREFLIGHT_IGNORE}" = "1" ]; then logWarn "Using host-preflight-ignore flag to disregard any failures during the pre-flight checks" case $kurl_exit_code in 3) logFail "Host preflights have warnings that should block the installation." return ;; 2) logWarn "Host preflights have warnings which is highly recommended to sort out the conditions before proceeding." return ;; 1) logFail "Host preflights have failures that should block the installation." return ;; esac fi logSuccess "Host preflights success" } IN_CLUSTER_PREFLIGHTS_RESULTS_OUTPUT_DIR="in-cluster-preflights" function cluster_preflights() { local is_primary="$1" local is_join="$2" local is_upgrade="$3" local opts= local out_file= out_file="${DIR}/${IN_CLUSTER_PREFLIGHTS_RESULTS_OUTPUT_DIR}/results-$(date +%s).txt" # Do not run those tests when/if kubernetes is not installed if ! commandExists kubectl; then return fi if [ ! -f /etc/kubernetes/admin.conf ]; then log "In cluster Preflights will not be executed because /etc/kubernetes/admin.conf is not found" return fi logStep "Running in cluster Preflights" mkdir -p "${DIR}/${IN_CLUSTER_PREFLIGHTS_RESULTS_OUTPUT_DIR}" if [ ! "${HOST_PREFLIGHT_ENFORCE_WARNINGS}" = "1" ] ; then opts="${opts} --ignore-warnings" fi if [ "${is_primary}" != "1" ]; then opts="${opts} --is-primary=false" fi if [ "${is_join}" = "1" ]; then opts="${opts} --is-join" fi if [ "${is_upgrade}" = "1" ]; then opts="${opts} --is-upgrade" fi if [ "$EXCLUDE_BUILTIN_HOST_PREFLIGHTS" == "1" ]; then opts="${opts} --exclude-builtin" fi if [ -n "$PRIMARY_HOST" ]; then opts="${opts} --primary-host=${PRIMARY_HOST}" fi if [ -n "$SECONDARY_HOST" ]; then opts="${opts} --secondary-host=${SECONDARY_HOST}" fi if [ "${HOST_PREFLIGHT_IGNORE}" = "1" ]; then "${DIR}"/bin/kurl cluster preflight "${MERGED_YAML_SPEC}" ${opts} | tee "${out_file}" host_preflights_mkresults "${out_file}" "${opts}" else set +e "${DIR}"/bin/kurl cluster preflight "${MERGED_YAML_SPEC}" ${opts} | tee "${out_file}" local kurl_exit_code="${PIPESTATUS[0]}" set -e on_cluster_preflights_mkresults "${out_file}" "${opts}" case $kurl_exit_code in 3) bail "In cluster Preflights have warnings that block the installation." ;; 2) logWarn "Preflights checks executed in cluster have warnings" logWarn "It is highly recommended to sort out the warning conditions before proceeding." logWarn "Be aware that continuing with preflight warnings can result in failures." log "" logWarn "Would you like to continue?" if ! confirmY ; then bail "The installation will not continue" fi return 0 ;; 1) bail "In cluster Preflights checks have failures that block the installation." ;; esac fi if [ "${HOST_PREFLIGHT_IGNORE}" = "1" ]; then logWarn "Using host-preflight-ignore flag to disregard any failures during the pre-flight checks" case $kurl_exit_code in 3) logFail "In cluster preflights have warnings that should block the installation." return ;; 2) logWarn "In cluster preflights have warnings which is highly recommended to sort out the conditions before proceeding." return ;; 1) logFail "In cluster preflights have failures that should block the installation." return ;; esac fi logSuccess "In cluster Preflights success" } # host_preflights_mkresults will append cli data to preflight results file function host_preflights_mkresults() { local out_file="$1" local opts="$2" local kurl_version= kurl_version="$(./bin/kurl version | grep version= | awk 'BEGIN { FS="=" }; { print $2 }')" local tmp_file= tmp_file="$(mktemp)" echo -e "[version]\n${kurl_version}\n\n[options]\n${opts}\n\n[results]" | cat - "${out_file}" > "${tmp_file}" && mv "${tmp_file}" "${out_file}" chmod -R +r "${DIR}/${HOST_PREFLIGHTS_RESULTS_OUTPUT_DIR}/" # make sure the file is readable by kots support bundle rm -f "${tmp_file}" } function on_cluster_preflights_mkresults() { local out_file="$1" local opts="$2" local kurl_version= kurl_version="$(./bin/kurl version | grep version= | awk 'BEGIN { FS="=" }; { print $2 }')" local tmp_file= tmp_file="$(mktemp)" echo -e "[version]\n${kurl_version}\n\n[options]\n${opts}\n\n[results]" | cat - "${out_file}" > "${tmp_file}" && mv "${tmp_file}" "${out_file}" chmod -R +r "${DIR}/${IN_CLUSTER_PREFLIGHTS_RESULTS_OUTPUT_DIR}/" # make sure the file is readable by kots support bundle rm -f "${tmp_file}" } # Uninstall Docker when containerd is selected to be installed and it is a new install # So that, is possible to avoid conflicts allow_remove_docker_new_install() { # If docker is not installed OR if containerd is not in the spec # then, the docker should not be uninstalled if ! commandExists docker || [ -z "$CONTAINERD_VERSION" ]; then return fi # if k8s is installed already then, the docker should not be uninstalled # so that it can be properly migrated to containerd if commandExists kubectl ; then return fi printf "\n${YELLOW}Docker already exists on this machine and Kubernetes is not yet installed.${NC} " printf "\n${YELLOW}In order to avoid conflicts when installing containerd, it is recommended that Docker be removed." printf "\n${YELLOW}Remove Docker?${NC} " if confirmY ; then uninstall_docker_new_installs_with_containerd else logWarn "\nThe installation will continue, however, if this script fails due to package" logWarn "conflicts, please uninstall Docker and re-run the install script." fi } # bail_if_unsupported_migration_from_rook_to_openebs will bail if the rook is being removed in favor of # openebs and the openebs version does not support migrations from rook. function bail_if_unsupported_migration_from_rook_to_openebs() { if [ -z "$ROOK_VERSION" ] && [ -n "$OPENEBS_VERSION" ]; then if commandExists kubectl; then if kubectl get ns 2>/dev/null | grep -q rook-ceph; then semverParse "$OPENEBS_VERSION" # if $OPENEBS_VERSION is less than 3.3.0 if [ "$major" -lt "3" ] || { [ "$major" = "3" ] && [ "$minor" -lt "3" ] ; }; then logFail "The OpenEBS version $OPENEBS_VERSION cannot be installed." bail "OpenEBS versions less than 3.3.0 do not support migrations from Rook" fi # registry + openebs without rook requires minio if [ -n "$REGISTRY_VERSION" ] && [ -z "$MINIO_VERSION" ]; then logFail "Migration from Rook with Registry requires an object store." bail "Please ensure that your installer also provides an object store with MinIO add-on." fi fi fi fi } # bail_if_unsupported_migration_from_longhorn_to_openebs will bail if the longhorn is being removed in favor of # openebs and the openebs version does not support migrations function bail_if_unsupported_migration_from_longhorn_to_openebs() { if [ -z "$LONGHORN_VERSION" ] && [ -n "$OPENEBS_VERSION" ]; then if commandExists kubectl; then if kubectl get ns 2>/dev/null | grep -q longhorn-system; then semverParse "$OPENEBS_VERSION" # if $OPENEBS_VERSION is less than 3.3.0 if [ "$major" -lt "3" ] || { [ "$major" = "3" ] && [ "$minor" -lt "3" ] ; }; then logFail "The OpenEBS version $OPENEBS_VERSION cannot be installed." bail "OpenEBS versions less than 3.3.0 do not support migrations from Longhorn" fi # registry + openebs without rook requires minio if [ -n "$REGISTRY_VERSION" ] && [ -z "$MINIO_VERSION" ]; then if kubectl get ns | grep -q minio; then logFail "Migration from Longhorn with Registry requires an object store." bail "Please ensure that your installer also provides an object store with MinIO add-on." fi fi fi fi fi } # bail_when_no_object_store_and_s3_enabled will bail if Minio and Rook are not present and kotsadm.s3Disabled is false. function bail_when_no_object_store_and_s3_enabled() { if [ -z "$MINIO_VERSION" ] && [ -z "$ROOK_VERSION" ]; then if [ -n "$KOTSADM_VERSION" ] && [ "$KOTSADM_DISABLE_S3" != "1" ]; then logFail "KOTS with s3 enabled requires an object store." bail "Please ensure that your installer also provides an object store with either the MinIO or Rook add-on." fi if [ -n "$VELERO_VERSION" ] && [ "$KOTSADM_DISABLE_S3" != "1" ]; then logFail "Velero with KOTS s3 enabled requires an object store." bail "Please, ensure that your installer also provides an object store with either the MinIO or Rook add-on." fi fi } # bail_when_no_object_store_or_storage_and_has_registry will bail if no object store or any storage addon be select # for an install with registry function bail_if_no_object_store_or_storage_and_has_registry() { if [ -n "$REGISTRY_VERSION" ]; then if [ -z "$MINIO_VERSION" ] && [ -z "$ROOK_VERSION" ] && [ -z "$OPENEBS_VERSION" ] && [ -z "$LONGHORN_VERSION" ]; then logFail "Registry add-on requires an object store or a storage provisioner" bail "Please ensure that your installer also provides an object store or a storage provisioner with either the MinIO or Rook or OpenEBS add-on." fi fi } # not allow run the installer/upgrade when kurl version is lower than the previous applied before function bail_if_kurl_version_is_lower_than_previous_config() { local previous_kurl_version= # do not fail the script if k8s is not installed or the cluster is down previous_kurl_version="$(kurl_get_current_version 2>/dev/null || true)" if [ -z "$previous_kurl_version" ]; then previous_kurl_version="$(kurl_get_last_version 2>/dev/null || true)" fi if [ -z "$previous_kurl_version" ]; then return fi if [ -n "$KURL_VERSION" ]; then semverCompare "$(echo "$KURL_VERSION" | sed 's/v//g')" "$(echo "$previous_kurl_version" | sed 's/v//g')" if [ "$SEMVER_COMPARE_RESULT" = "-1" ]; then # greater than or equal to 14.2.21 logFail "The current kURL release version $KURL_VERSION is less than the previously installed version $previous_kurl_version." bail "Please use a kURL release version which is equal to or greater than the version used previously." fi fi log "Previous kURL version used to install or update the cluster is $previous_kurl_version" if [ -n "$KURL_VERSION" ]; then log "and the current kURL version used is $KURL_VERSION" fi } # bail if rook/openebs prereqs not met for minimumNodeCount param for automated storage scaling function bail_if_automated_storage_scaling_prereqs_not_met() { if [ -n "$ROOK_VERSION" ] && [ -n "$OPENEBS_VERSION" ]; then semverCompare "$(echo "$ROOK_VERSION" | sed 's/v//g')" "$(echo "1.11.7" | sed 's/v//g')" if [ "$SEMVER_COMPARE_RESULT" = "-1" ]; then # greater than or equal to 1.11.7 logFail "The current Rook version $ROOK_VERSION is less than 1.11.7 which is required to use automated storage scaling (minimumNodeCount parameter)" bail "Please use Rook version 1.11.7 or greater, or remove the minimumNodeCount parameter" fi semverCompare "$(echo "$OPENEBS_VERSION" | sed 's/v//g')" "$(echo "3.6.0" | sed 's/v//g')" if [ "$SEMVER_COMPARE_RESULT" = "-1" ]; then # greater than or equal to 3.6.0 logFail "The current OpenEBS version $OPENEBS_VERSION is less than 3.6.0 which is required to use automated storage scaling (minimumNodeCount parameter)" bail "Please use OpenEBS version 3.6.0 or greater, or remove the minimumNodeCount parameter" fi if [ -n "$ROOK_MINIMUM_NODE_COUNT" ] && [ "$ROOK_MINIMUM_NODE_COUNT" -lt "3" ]; then bail "Rook minimumNodeCount parameter must be greater than or equal to 3." fi fi } # shellcheck disable=SC2148 # Gather any additional information required from the user that could not be discovered and was not # passed with a flag function prompts_can_prompt() { # Need the TTY to accept input and stdout to display # Prompts when running the script through the terminal but not as a subshell if [ -c /dev/tty ]; then return 0 fi return 1 } function prompt() { if ! prompts_can_prompt ; then bail "Cannot prompt, shell is not interactive" fi set +e if [ -z ${TEST_PROMPT_RESULT+x} ]; then read PROMPT_RESULT < /dev/tty else PROMPT_RESULT="$TEST_PROMPT_RESULT" fi set -e } function confirmY() { printf "(Y/n) " if [ "$ASSUME_YES" = "1" ]; then echo "Y" return 0 fi if ! prompts_can_prompt ; then echo "Y" logWarn "Automatically accepting prompt, shell is not interactive" return 0 fi prompt if [ "$PROMPT_RESULT" = "n" ] || [ "$PROMPT_RESULT" = "N" ]; then return 1 fi return 0 } function confirmN() { printf "(y/N) " if [ "$ASSUME_YES" = "1" ]; then echo "Y" return 0 fi if ! prompts_can_prompt ; then echo "N" logWarn "Automatically declining prompt, shell is not interactive" return 1 fi prompt if [ "$PROMPT_RESULT" = "y" ] || [ "$PROMPT_RESULT" = "Y" ]; then return 0 fi return 1 } function join_prompts() { if [ -n "$API_SERVICE_ADDRESS" ]; then splitHostPort "$API_SERVICE_ADDRESS" if [ -z "$PORT" ]; then PORT="6443" fi KUBERNETES_MASTER_ADDR="$HOST" KUBERNETES_MASTER_PORT="$PORT" LOAD_BALANCER_ADDRESS="$HOST" LOAD_BALANCER_PORT="$PORT" else prompt_for_master_address splitHostPort "$KUBERNETES_MASTER_ADDR" if [ -n "$PORT" ]; then KUBERNETES_MASTER_ADDR="$HOST" KUBERNETES_MASTER_PORT="$PORT" else KUBERNETES_MASTER_PORT="6443" fi LOAD_BALANCER_ADDRESS="$KUBERNETES_MASTER_ADDR" LOAD_BALANCER_PORT="$KUBERNETES_MASTER_PORT" API_SERVICE_ADDRESS="${KUBERNETES_MASTER_ADDR}:${KUBERNETES_MASTER_PORT}" fi prompt_for_token prompt_for_token_ca_hash } function prompt_for_token() { if [ -n "$KUBEADM_TOKEN" ]; then return fi if ! prompts_can_prompt ; then bail "kubernetes.kubeadmToken required" fi printf "Please enter the kubernetes discovery token.\n" while true; do printf "Kubernetes join token: " prompt if [ -n "$PROMPT_RESULT" ]; then KUBEADM_TOKEN="$PROMPT_RESULT" return fi done } function prompt_for_token_ca_hash() { if [ -n "$KUBEADM_TOKEN_CA_HASH" ]; then return fi if ! prompts_can_prompt ; then bail "kubernetes.kubeadmTokenCAHash required" fi printf "Please enter the discovery token CA's hash.\n" while true; do printf "Kubernetes discovery token CA hash: " prompt if [ -n "$PROMPT_RESULT" ]; then KUBEADM_TOKEN_CA_HASH="$PROMPT_RESULT" return fi done } function prompt_for_master_address() { if [ -n "$KUBERNETES_MASTER_ADDR" ]; then return fi if ! prompts_can_prompt ; then bail "kubernetes.masterAddress required" fi printf "Please enter the Kubernetes master address.\n" printf "e.g. 10.128.0.4\n" while true; do printf "Kubernetes master address: " prompt if [ -n "$PROMPT_RESULT" ]; then KUBERNETES_MASTER_ADDR="$PROMPT_RESULT" return fi done } function common_prompts() { if [ -z "$PRIVATE_ADDRESS" ]; then prompt_for_private_ip fi # TODO public address? only required for adding SAN to K8s API server cert prompt_airgap_preload_images if [ "$HA_CLUSTER" = "1" ] && [ "$EKCO_ENABLE_INTERNAL_LOAD_BALANCER" != "1" ]; then prompt_for_load_balancer_address fi } function prompt_license() { if [ -n "$LICENSE_URL" ]; then if [ "$AIRGAP" = "1" ]; then bail "License Agreements with Airgap installs are not supported yet.\n" return fi curl --fail $LICENSE_URL || bail "Failed to fetch license at url: $LICENSE_URL" printf "\n\nThe license text is reproduced above. To view the license in your browser visit $LICENSE_URL.\n\n" printf "Do you accept the license agreement?" if confirmN; then printf "License Agreement Accepted. Continuing Installation.\n" else bail "License Agreement Not Accepted. 'y' or 'Y' needed to accept. Exiting installation." fi fi } function prompt_for_load_balancer_address() { local lastLoadBalancerAddress= if kubeadm_cluster_configuration >/dev/null 2>&1; then lastLoadBalancerAddress="$(kubeadm_cluster_configuration | grep 'controlPlaneEndpoint:' | sed 's/controlPlaneEndpoint: \|"//g')" if [ -n "$lastLoadBalancerAddress" ]; then splitHostPort "$lastLoadBalancerAddress" if [ "$HOST" = "$lastLoadBalancerAddress" ]; then lastLoadBalancerAddress="$lastLoadBalancerAddress:6443" fi fi fi if [ -n "$LOAD_BALANCER_ADDRESS" ] && [ -n "$lastLoadBalancerAddress" ]; then splitHostPort "$LOAD_BALANCER_ADDRESS" if [ "$HOST" = "$LOAD_BALANCER_ADDRESS" ]; then LOAD_BALANCER_ADDRESS="$LOAD_BALANCER_ADDRESS:6443" fi if [ "$LOAD_BALANCER_ADDRESS" != "$lastLoadBalancerAddress" ]; then LOAD_BALANCER_ADDRESS_CHANGED=1 fi fi if [ -z "$LOAD_BALANCER_ADDRESS" ] && [ -n "$lastLoadBalancerAddress" ]; then LOAD_BALANCER_ADDRESS="$lastLoadBalancerAddress" fi if [ -z "$LOAD_BALANCER_ADDRESS" ] && [ "$KUBERNETES_LOAD_BALANCER_USE_FIRST_PRIMARY" = "1" ]; then # EKCO_ENABLE_INTERNAL_LOAD_BALANCER takes precedence if [ -z "$EKCO_VERSION" ] || [ "$EKCO_ENABLE_INTERNAL_LOAD_BALANCER" != "1" ]; then LOAD_BALANCER_ADDRESS="$PRIVATE_ADDRESS" LOAD_BALANCER_PORT=6443 fi fi if [ -z "$LOAD_BALANCER_ADDRESS" ]; then if ! prompts_can_prompt ; then bail "kubernetes.loadBalancerAddress required" fi if [ -n "$EKCO_VERSION" ] && semverCompare "$EKCO_VERSION" "0.11.0" && [ "$SEMVER_COMPARE_RESULT" -ge "0" ]; then printf "\nIf you would like to bring your own load balancer to route external and internal traffic to the API servers, please enter a load balancer address.\n" printf "HAProxy will be used to perform this load balancing internally if you do not provide a load balancer address.\n" printf "Load balancer address: " prompt LOAD_BALANCER_ADDRESS="$PROMPT_RESULT" if [ -z "$LOAD_BALANCER_ADDRESS" ]; then EKCO_ENABLE_INTERNAL_LOAD_BALANCER=1 fi else printf "Please enter a load balancer address to route external and internal traffic to the API servers.\n" printf "In the absence of a load balancer address, all traffic will be routed to the first master.\n" printf "Load balancer address: " prompt LOAD_BALANCER_ADDRESS="$PROMPT_RESULT" if [ -z "$LOAD_BALANCER_ADDRESS" ]; then LOAD_BALANCER_ADDRESS="$PRIVATE_ADDRESS" LOAD_BALANCER_PORT=6443 fi fi fi if [ -z "$LOAD_BALANCER_PORT" ]; then splitHostPort "$LOAD_BALANCER_ADDRESS" LOAD_BALANCER_ADDRESS="$HOST" LOAD_BALANCER_PORT="$PORT" fi if [ -z "$LOAD_BALANCER_PORT" ]; then LOAD_BALANCER_PORT=6443 fi # localhost:6444 is the address of the internal load balancer if [ "$LOAD_BALANCER_ADDRESS" = "localhost" ] && [ "$LOAD_BALANCER_PORT" = "6444" ]; then EKCO_ENABLE_INTERNAL_LOAD_BALANCER=1 fi if [ -n "$LOAD_BALANCER_ADDRESS" ]; then $BIN_BASHTOYAML -c "$MERGED_YAML_SPEC" -f "load-balancer-address=${LOAD_BALANCER_ADDRESS}:${LOAD_BALANCER_PORT}" fi } # if remote nodes are in the cluster and this is an airgap install, prompt the user to run the # load-images task on all remotes before proceeding because remaining steps may cause pods to # be scheduled on those nodes with new images. function prompt_airgap_preload_images() { if [ "$AIRGAP" != "1" ]; then return 0 fi if ! kubernetes_has_remotes; then return 0 fi local unattended_nodes_missing_images=0 while read -r node; do local nodeName=$(echo "$node" | awk '{ print $1 }') if [ "$nodeName" = "$(get_local_node_name)" ]; then continue fi if kubernetes_node_has_all_images "$nodeName"; then continue fi local kurl_install_directory_flag="$(get_kurl_install_directory_flag "${KURL_INSTALL_DIRECTORY_FLAG}")" printf "\nRun this script on node ${GREEN}${nodeName}${NC} to load required images before proceeding:\n" printf "\n" printf "${GREEN}\tcat ./tasks.sh | sudo bash -s load-images${kurl_install_directory_flag}${NC}" printf "\n" if [ "${KURL_IGNORE_REMOTE_LOAD_IMAGES_PROMPT}" != "1" ]; then if ! prompts_can_prompt ; then unattended_nodes_missing_images=1 continue fi while true; do echo "" printf "Have images been loaded on node ${nodeName}? " if confirmN ; then break fi done else logWarn "Remote load-images task prompt explicitly ignored" fi done < <(kubectl get nodes --no-headers) if [ "$unattended_nodes_missing_images" = "1" ] ; then bail "Preloading images required" fi } function prompt_for_private_ip() { _count=0 if [ "$IPV6_ONLY" = "1" ]; then _regex_ipv6="^[[:digit:]]+: ([^[:space:]]+)[[:space:]]+inet6 ([[:alnum:]:]+)" while read -r _line; do [[ $_line =~ $_regex_ipv6 ]] if [ "${BASH_REMATCH[1]}" != "lo" ] && [ "${BASH_REMATCH[1]}" != "kube-ipvs0" ] && [ "${BASH_REMATCH[1]}" != "docker0" ] && [ "${BASH_REMATCH[1]}" != "weave" ] && [ "${BASH_REMATCH[1]}" != "antrea-gw0" ] && [ "${BASH_REMATCH[1]}" != "flannel.1" ] && [ "${BASH_REMATCH[1]}" != "cni0" ]; then _iface_names[$((_count))]=${BASH_REMATCH[1]} _iface_addrs[$((_count))]=${BASH_REMATCH[2]} let "_count += 1" fi done <<< "$(ip -6 -o addr)" else _regex_ipv4="^[[:digit:]]+: ([^[:space:]]+)[[:space:]]+[[:alnum:]]+ ([[:digit:].]+)" while read -r _line; do [[ $_line =~ $_regex_ipv4 ]] if [ "${BASH_REMATCH[1]}" != "lo" ] && [ "${BASH_REMATCH[1]}" != "kube-ipvs0" ] && [ "${BASH_REMATCH[1]}" != "docker0" ] && [ "${BASH_REMATCH[1]}" != "weave" ] && [ "${BASH_REMATCH[1]}" != "antrea-gw0" ] && [ "${BASH_REMATCH[1]}" != "flannel.1" ] && [ "${BASH_REMATCH[1]}" != "cni0" ]; then _iface_names[$((_count))]=${BASH_REMATCH[1]} _iface_addrs[$((_count))]=${BASH_REMATCH[2]} let "_count += 1" fi done <<< "$(ip -4 -o addr)" fi if [ "$_count" -eq "0" ]; then echo >&2 "Error: The installer couldn't discover any valid network interfaces on this machine." echo >&2 "Check your network configuration and re-run this script again." echo >&2 "If you want to skip this discovery process, pass the 'private-address' arg to this script, e.g. 'sudo ./install.sh private-address=1.2.3.4'" exit 1 elif [ "$_count" -eq "1" ]; then PRIVATE_ADDRESS=${_iface_addrs[0]} printf "The installer will use network interface '%s' (with IP address '%s')\n" "${_iface_names[0]}" "${_iface_addrs[0]}" return fi if ! prompts_can_prompt ; then bail "Multiple network interfaces present, please select an IP address. Try passing the selected address to this script e.g. 'sudo ./install.sh private-address=1.2.3.4' or assign an IP address to the privateAddress field in the kurl add-on." fi printf "The installer was unable to automatically detect the private IP address of this machine.\n" printf "Please choose one of the following network interfaces:\n" for i in $(seq 0 $((_count-1))); do printf "[%d] %-5s\t%s\n" "$i" "${_iface_names[$i]}" "${_iface_addrs[$i]}" done while true; do printf "Enter desired number (0-%d): " "$((_count-1))" prompt if [ -z "$PROMPT_RESULT" ]; then continue fi if [ "$PROMPT_RESULT" -ge "0" ] && [ "$PROMPT_RESULT" -lt "$_count" ]; then PRIVATE_ADDRESS=${_iface_addrs[$PROMPT_RESULT]} printf "The installer will use network interface '%s' (with IP address '%s').\n" "${_iface_names[$PROMPT_RESULT]}" "$PRIVATE_ADDRESS" return fi done } # read_proxy_config_from_env makes sure that both proxy variables (upper # and lower case) are set to the same value. for example http_proxy and # HTTP_PROXY both must point to the same address. this function sets the # following variables ENV_HTTP_PROXY_ADDRESS, ENV_HTTPS_PROXY_ADDRESS, # and ENV_NO_PROXY. function read_proxy_config_from_env() { # by default https proxy configuration inherits from http proxy config. if [ -n "$HTTP_PROXY" ]; then ENV_HTTP_PROXY_ADDRESS="$HTTP_PROXY" ENV_HTTPS_PROXY_ADDRESS="$HTTP_PROXY" elif [ -n "$http_proxy" ]; then ENV_HTTP_PROXY_ADDRESS="$http_proxy" ENV_HTTPS_PROXY_ADDRESS="$http_proxy" fi # if https proxy is explicitly set, it overrides the inherit http proxy # configuration. if [ -n "$HTTPS_PROXY" ]; then ENV_HTTPS_PROXY_ADDRESS="$HTTPS_PROXY" elif [ -n "$https_proxy" ]; then ENV_HTTPS_PROXY_ADDRESS="$https_proxy" fi # no proxy is simply copied from the environment. if [ -n "$NO_PROXY" ]; then ENV_NO_PROXY="$NO_PROXY" elif [ -n "$no_proxy" ]; then ENV_NO_PROXY="$no_proxy" fi # here we sanitize the proxy configuration. we make sure that both upper # and lower case variables point to the same value. if [ -n "$ENV_HTTP_PROXY_ADDRESS" ]; then export http_proxy="$ENV_HTTP_PROXY_ADDRESS" export HTTP_PROXY="$ENV_HTTP_PROXY_ADDRESS" fi if [ -n "$ENV_HTTPS_PROXY_ADDRESS" ]; then export https_proxy="$ENV_HTTPS_PROXY_ADDRESS" export HTTPS_PROXY="$ENV_HTTPS_PROXY_ADDRESS" fi if [ -n "$ENV_NO_PROXY" ]; then export no_proxy="$ENV_NO_PROXY" export NO_PROXY="$ENV_NO_PROXY" fi # if proxy is configured we need to make sure that kubectl can reach # the apiserver without going through the proxy. if [ -n "$ENV_HTTP_PROXY_ADDRESS" ] || [ -n "$ENV_HTTPS_PROXY_ADDRESS" ]; then kubectl_no_proxy fi } # proxy_bootstrap reads the proxy configuration from the environment and # overrides them with the configuration provided by the user through the # installer yaml. at the end of this process three variables are set: # ENV_HTTP_PROXY_ADDRESS, ENV_HTTPS_PROXY_ADDRESS, and ENV_NO_PROXY. function proxy_bootstrap() { # read and sanitize proxy configuration from environment variables. read_proxy_config_from_env # users can still provide a different proxy by patching the installer # yaml, we need to verify if this is the case and then use the proxy # set in the yaml for both http and https. if [ -n "$INSTALLER_SPEC_FILE" ]; then local overrideProxy=$(grep "proxyAddress:" "$INSTALLER_SPEC_FILE" | grep -o "http[^'\" ]*") if [ -n "$overrideProxy" ]; then ENV_HTTP_PROXY_ADDRESS="$overrideProxy" ENV_HTTPS_PROXY_ADDRESS="$overrideProxy" export http_proxy="$overrideProxy" export https_proxy="$overrideProxy" export HTTP_PROXY="$overrideProxy" export HTTPS_PROXY="$overrideProxy" kubectl_no_proxy echo "Bootstrapped proxy address from installer spec file: $https_proxy" return fi fi local proxy=$(echo "$INSTALLER_YAML" | grep "proxyAddress:" | grep -o "http[^'\" ]*") if [ -z "$proxy" ]; then return fi ENV_HTTP_PROXY_ADDRESS="$proxy" ENV_HTTPS_PROXY_ADDRESS="$proxy" export http_proxy="$proxy" export https_proxy="$proxy" export HTTP_PROXY="$proxy" export HTTPS_PROXY="$proxy" kubectl_no_proxy echo "Bootstrapped proxy address from installer yaml: $https_proxy" } # check_proxy_config tries to check if is possible connect with the registry # Th following code will check if the proxy is invalid by running crictl pull test/invalid/image:latest # See that the image does not matter to us. We are looking here for proxy issues only and then, when the Proxy config # not to be configured accurately we will face an issue like: # E0525 09:01:01.952576 1399831 remote_image.go:167] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"docker.io/test/invalid/image:latest\": failed to resolve reference \"docker.io/test/invalid/image:latest\": failed to do request: Head \"https://registry-1.docker.io/v2/test/invalid/image/manifests/latest\": proxyconnect tcp: dial tcp: lookup invalidproxy: Temporary failure in name resolution" image="test/invalid/image:latest" # FATA[0000] pulling image: rpc error: code = Unknown desc = failed to pull and unpack image "docker.io/test/invalid/image:latest": failed to resolve reference "docker.io/test/invalid/image:latest": failed to do request: Head "https://registry-1.docker.io/v2/test/invalid/image/manifests/latest": proxyconnect tcp: dial tcp: lookup invalidproxy: Temporary failure in name resolution function check_proxy_config() { if [ -z "$CONTAINERD_VERSION" ]; then return fi logStep "Checking proxy configuration with Containerd" # Echo containerd Proxy config: local proxy_config_file="/etc/systemd/system/containerd.service.d/http-proxy.conf" if [ ! -f "$proxy_config_file" ]; then log "Skipping test. No HTTP proxy configuration found." return fi echo "" log "Proxy config:" grep -v -e '^\[Service\]' -e '^# Generated by kURL' "$proxy_config_file" echo "" if ! response=$(crictl pull test/invalid/image:latest 2>&1) && [[ $response =~ .*"proxy".* ]]; then logWarn "Proxy connection issues were identified:" error_message=$(echo "$response" | grep -oP '(?<=failed to do request: ).*' | sed -r 's/.*: //' | awk -F "\"" '{print $(NF-1)}' | sed -r 's/test\/invalid\/image:latest//') logWarn "$error_message" echo "" logWarn "Please review the proxy configuration and ensure that it is valid." logWarn "More info: https://kurl.sh/docs/install-with-kurl/proxy-installs" return fi logSuccess "Unable to identify proxy problems" } # kubectl_no_proxy makes sure that kubectl can reach the apiserver without # going through the proxy. this is done by adding the apiserver address to # the NO_PROXY and no_proxy environment variable. this function expects # both upper and lower case variables to be already sanitized (to contain # the same value). function kubectl_no_proxy() { if [ ! -f /etc/kubernetes/admin.conf ]; then return fi kubectlEndpoint=$(cat /etc/kubernetes/admin.conf | grep 'server:' | awk '{ print $NF }' | sed -E 's/https?:\/\///g') splitHostPort "$kubectlEndpoint" # if the kubectl endpoint is already present in the no_proxy env we # can skip and move forward. this avoids adding the same ip address # multiple times and makes this function idempotent. if echo "$no_proxy" | grep -q "$HOST"; then return fi if [ -n "$no_proxy" ]; then export no_proxy="$no_proxy,$HOST" export NO_PROXY="$NO_PROXY,$HOST" else export no_proxy="$HOST" export NO_PROXY="$HOST" fi } function configure_proxy() { if [ "$NO_PROXY" = "1" ]; then echo "Not using http proxy" unset PROXY_ADDRESS unset http_proxy unset HTTP_PROXY unset https_proxy unset HTTPS_PROXY return fi if [ -z "$PROXY_ADDRESS" ] && [ -z "$ENV_HTTP_PROXY_ADDRESS" ] && [ -z "$ENV_HTTPS_PROXY_ADDRESS" ]; then log "Not using proxy address" return fi # if the proxy has been set in the installer we use that one for both # http and https. if [ -n "$PROXY_ADDRESS" ]; then logWarn "Overriding HTTP and HTTPS proxies addresses with $PROXY_ADDRESS" PROXY_HTTPS_ADDRESS="$PROXY_ADDRESS" return fi # if user hasn't provide any proxy configuration we use the ones # present in the environment. PROXY_ADDRESS="$ENV_HTTP_PROXY_ADDRESS" PROXY_HTTPS_ADDRESS="$ENV_HTTP_PROXY_ADDRESS" if [ -n "$ENV_HTTPS_PROXY_ADDRESS" ]; then PROXY_HTTPS_ADDRESS="$ENV_HTTPS_PROXY_ADDRESS" fi echo "Using system proxies, HTTP: $PROXY_ADDRESS, HTTPS: $PROXY_HTTPS_ADDRESS" } function configure_no_proxy_preinstall() { if [ -z "$PROXY_ADDRESS" ]; then return fi local addresses="localhost,127.0.0.1,.svc,.local,.default,kubernetes" if [ -n "$ENV_NO_PROXY" ]; then addresses="${addresses},${ENV_NO_PROXY}" fi if [ -n "$PRIVATE_ADDRESS" ]; then addresses="${addresses},${PRIVATE_ADDRESS}" fi if [ -n "$LOAD_BALANCER_ADDRESS" ]; then addresses="${addresses},${LOAD_BALANCER_ADDRESS}" fi if [ -n "$ADDITIONAL_NO_PROXY_ADDRESSES" ]; then addresses="${addresses},${ADDITIONAL_NO_PROXY_ADDRESSES}" fi # filter duplicates addresses=$(unique_no_proxy_addresses "$addresses") # kubeadm requires this in the environment to reach K8s masters export no_proxy="$addresses" NO_PROXY_ADDRESSES="$addresses" echo "Exported no_proxy: $no_proxy" } function configure_no_proxy() { if [ -z "$PROXY_ADDRESS" ]; then return fi local addresses="localhost,127.0.0.1,.svc,.local,.default,kubernetes" if [ -n "$ENV_NO_PROXY" ]; then addresses="${addresses},${ENV_NO_PROXY}" fi if [ -n "$KOTSADM_VERSION" ]; then addresses="${addresses},kotsadm-rqlite,kotsadm-api-node" fi if [ -n "$ROOK_VERSION" ]; then addresses="${addresses},.rook-ceph" fi if [ -n "$FLUENTD_VERSION" ]; then addresses="${addresses},.logging" fi if [ -n "$REGISTRY_VERSION" ]; then addresses="${addresses},.kurl" fi if [ -n "$PROMETHEUS_VERSION" ]; then addresses="${addresses},.monitoring" fi if [ -n "$VELERO_VERSION" ] && [ -n "$VELERO_NAMESPACE" ]; then addresses="${addresses},.${VELERO_NAMESPACE}" fi if [ -n "$MINIO_VERSION" ] && [ -n "$MINIO_NAMESPACE" ]; then addresses="${addresses},.${MINIO_NAMESPACE}" fi if [ -n "$PRIVATE_ADDRESS" ]; then addresses="${addresses},${PRIVATE_ADDRESS}" fi if [ -n "$LOAD_BALANCER_ADDRESS" ]; then addresses="${addresses},${LOAD_BALANCER_ADDRESS}" fi if [ -n "$KUBERNETES_MASTER_ADDR" ]; then addresses="${addresses},${KUBERNETES_MASTER_ADDR}" fi if [ -n "$POD_CIDR" ]; then addresses="${addresses},${POD_CIDR}" fi if [ -n "$SERVICE_CIDR" ]; then addresses="${addresses},${SERVICE_CIDR}" fi if [ -n "$ADDITIONAL_NO_PROXY_ADDRESSES" ]; then addresses="${addresses},${ADDITIONAL_NO_PROXY_ADDRESSES}" fi # filter duplicates addresses=$(unique_no_proxy_addresses "$addresses") # kubeadm requires this in the environment to reach K8s masters export no_proxy="$addresses" NO_PROXY_ADDRESSES="$addresses" echo "Exported no_proxy: $no_proxy" } function unique_no_proxy_addresses() { echo "$1" | sed 's/,/\n/g' | sed '/^\s*$/d' | sort | uniq | paste -s --delimiters="," } REPORTING_CONTEXT_INFO="" INSTALLATION_ID= TESTGRID_ID= KURL_CLUSTER_UUID= function report_install_start() { # report that the install started # this includes the install ID, time, kurl URL, and linux distribution name + version. if [ -f "/tmp/testgrid-id" ]; then TESTGRID_ID=$(cat /tmp/testgrid-id) fi # if airgapped, don't create an installation ID and return early if [ "$AIRGAP" == "1" ]; then return 0 fi # if DISABLE_REPORTING is set, don't create an installation ID (which thus disables all the other reporting calls) and return early if [ "${DISABLE_REPORTING}" = "1" ]; then return 0 fi INSTALLATION_ID=$(< /dev/urandom tr -dc a-z0-9 | head -c16) local started=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 # Determine if it is the first kurl install local is_upgrade="false" if kubernetes_resource_exists kube-system configmap kurl-config; then local is_upgrade="true" fi # get the kurl_cluster_id attempt_get_cluster_id curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\ \"started\": \"$started\", \ \"os\": \"$LSB_DIST $DIST_VERSION\", \ \"kernel_version\": \"$KERNEL_MAJOR.$KERNEL_MINOR\", \ \"kurl_url\": \"$KURL_URL\", \ \"installer_id\": \"$INSTALLER_ID\", \ \"testgrid_id\": \"$TESTGRID_ID\", \ \"machine_id\": \"$MACHINE_ID\", \ \"kurl_instance_uuid\": \"$KURL_INSTANCE_UUID\", \ \"is_upgrade\": $is_upgrade, \ \"is_ha_cluster\": \"$HA_CLUSTER\", \ \"num_processors\": \"$(nproc)\", \ \"memory_size_kb\": \"$(cat /proc/meminfo | grep MemTotal | awk '{print $2}')\", \ \"kurl_cluster_uuid\": \"$KURL_CLUSTER_UUID\" \ }" \ $REPLICATED_APP_URL/kurl_metrics/start_install/$INSTALLATION_ID || true # report the kurl version as an addon if it is set if [ -n "${KURL_VERSION}" ]; then report_addon_start "kurl" "$KURL_VERSION" report_addon_success "kurl" "$KURL_VERSION" fi } function report_install_success() { # report that the install finished successfully # if INSTALLATION_ID is empty reporting is disabled if [ -z "$INSTALLATION_ID" ]; then return 0 fi local completed=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\"finished\": \"$completed\"}" \ $REPLICATED_APP_URL/kurl_metrics/finish_install/$INSTALLATION_ID || true } function report_install_fail() { # report that the install failed local cause=$1 # if INSTALLATION_ID is empty reporting is disabled if [ -z "$INSTALLATION_ID" ]; then return 0 fi local completed=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\"finished\": \"$completed\", \"cause\": \"$cause\"}" \ $REPLICATED_APP_URL/kurl_metrics/fail_install/$INSTALLATION_ID || true } function report_addon_start() { # report that an addon started installation local name=$1 local version=$2 # if INSTALLATION_ID is empty reporting is disabled if [ -z "$INSTALLATION_ID" ]; then return 0 fi local started=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\"started\": \"$started\", \"addon_version\": \"$version\", \"testgrid_id\": \"$TESTGRID_ID\"}" \ $REPLICATED_APP_URL/kurl_metrics/start_addon/$INSTALLATION_ID/$name || true } function report_addon_success() { # report that an addon installed successfully local name=$1 local version=$2 # if INSTALLATION_ID is empty reporting is disabled if [ -z "$INSTALLATION_ID" ]; then return 0 fi local completed=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\"finished\": \"$completed\"}" \ $REPLICATED_APP_URL/kurl_metrics/finish_addon/$INSTALLATION_ID/$name || true } function report_addon_fail() { # report that an addon installed successfully local name=$1 local version=$2 # if INSTALLATION_ID is empty reporting is disabled if [ -z "$INSTALLATION_ID" ]; then return 0 fi local completed=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\"finished\": \"$completed\"}" \ $REPLICATED_APP_URL/kurl_metrics/fail_addon/$INSTALLATION_ID/$name || true } function ctrl_c() { trap - SIGINT # reset SIGINT handler to default - someone should be able to ctrl+c the support bundle collector read line file <<<$(caller) printf "${YELLOW}Trapped ctrl+c on line $line${NC}\n" local totalStack totalStack=$(stacktrace) local infoString="with stack $totalStack - bin utils $KURL_BIN_UTILS_FILE - context $REPORTING_CONTEXT_INFO" if [ -z "$SUPPORT_BUNDLE_READY" ]; then report_install_fail "trapped ctrl+c before completing k8s install $infoString" exit 1 fi report_install_fail "trapped ctrl+c $infoString" collect_support_bundle exit 1 # exit with error } # unused function addon_install_fail() { # report that an addon failed to install successfully local name=$1 local version=$2 # if INSTALLATION_ID is empty reporting is disabled if [ -z "$INSTALLATION_ID" ]; then return 1 # return error because the addon in question did too fi local completed=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\"finished\": \"$completed\"}" \ $REPLICATED_APP_URL/kurl_metrics/fail_addon/$INSTALLATION_ID/$name || true # provide an option for a user to provide a support bundle printf "${YELLOW}Addon ${name} ${version} failed to install${NC}\n" collect_support_bundle return 1 # return error because the addon in question did too } # unused function addon_install_fail_nobundle() { # report that an addon failed to install successfully local name=$1 local version=$2 # if INSTALLATION_ID is empty reporting is disabled if [ -z "$INSTALLATION_ID" ]; then return 1 # return error because the addon in question did too fi local completed=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # rfc3339 curl -s --output /dev/null -H 'Content-Type: application/json' --max-time 5 \ -d "{\"finished\": \"$completed\"}" \ $REPLICATED_APP_URL/kurl_metrics/fail_addon/$INSTALLATION_ID/$name || true return 1 # return error because the addon in question did too } function collect_support_bundle() { trap - SIGINT # reset SIGINT handler to default - someone should be able to ctrl+c the support bundle collector return 0 # disabled for now # if someone has set ASSUME_YES, we shouldn't automatically upload a support bundle if [ "$ASSUME_YES" = "1" ]; then return 0 fi if ! prompts_can_prompt ; then return 0 fi printf "${YELLOW}Would you like to provide a support bundle to aid us in avoiding similar errors in the future?${NC}\n" if ! confirmN; then return 0 fi printf "${YELLOW}Please provide your work email address for our records (this is not a support ticket):${NC}\n" prompt local email_address="" if [ -n "$PROMPT_RESULT" ]; then email_address="$PROMPT_RESULT" fi printf "${YELLOW}Could you provide a quick description of the issue you encountered?${NC}\n" prompt local issue_description="" if [ -n "$PROMPT_RESULT" ]; then issue_description="$PROMPT_RESULT" fi # collect support bundle printf "Collecting support bundle now:" kubectl support-bundle https://kots.io # find the support bundle filename local support_bundle_filename=$(find . -type f -name "support-bundle-*.tar.gz" | sort -r | head -n 1) curl 'https://support-bundle-secure-upload.replicated.com/v1/upload' \ -H 'accept: application/json, text/plain, */*' \ -X POST \ -H "Content-Type: multipart/form-data" \ -F "data={\"first_name\":\"kurl.sh\",\"last_name\":\"installer\",\"email_address\":\"${email_address}\",\"company\":\"\",\"description\":\"${issue_description}\"}" \ -F "file=@${support_bundle_filename}" \ --compressed printf "\nSupport bundle uploaded!\n" } function trap_report_error { if [[ ! $- =~ e ]]; then # if errexit is not set (set -e), don't report an error here return 0 fi trap - ERR # reset the error handler to default in case there are errors within this function read line file <<<$(caller) printf "${YELLOW}An error occurred on line $line${NC}\n" local totalStack totalStack=$(stacktrace) report_install_fail "An error occurred with stack $totalStack - bin utils $KURL_BIN_UTILS_FILE - context $REPORTING_CONTEXT_INFO" if [ -n "$SUPPORT_BUNDLE_READY" ]; then collect_support_bundle fi exit 1 } function stacktrace { local i=1 local totalStack while caller $i > /dev/null; do read line func file <<<$(caller $i) totalStack="$totalStack (file: $file func: $func line: $line)" ((i++)) done echo "$totalStack" } # attempt_get_cluster_id will get the cluster uuid from the kurl_cluster_uuid configmap and set the # KURL_CLUSTER_UUID env var. If it does not exist or the cluster is down, check the disk to see if # it is persisted there, otherwise make a new UUID for KURL_CLUSTER_UUID and save to disk. function attempt_get_cluster_id() { if ! kubernetes_resource_exists kurl configmap kurl-cluster-uuid ; then # If the cluster is down, check to see if this is an etcd member and the cluster uuid is # persisted to disk. if [ -d /var/lib/etcd/member ] && [ -f "${KURL_INSTALL_DIRECTORY}/clusteruuid" ]; then KURL_CLUSTER_UUID=$(cat "${KURL_INSTALL_DIRECTORY}/clusteruuid") else KURL_CLUSTER_UUID=$(< /dev/urandom tr -dc a-z0-9 | head -c32) fi else KURL_CLUSTER_UUID=$(kubectl get configmap -n kurl kurl-cluster-uuid -o jsonpath='{.data.kurl_cluster_uuid}') fi # Persist the cluster uuid to disk in case the cluster is down. # The tasks.sh reset command will remove the KURL_INSTALL_DIRECTORY directory and the cluster uuid will # be regenerated if reset. echo "$KURL_CLUSTER_UUID" > "${KURL_INSTALL_DIRECTORY}/clusteruuid" } # maybe_set_kurl_cluster_uuid will create the kurl_cluster_uuid configmap using the # KURL_CLUSTER_UUID env var if it does not already exist. function maybe_set_kurl_cluster_uuid() { if [ -z "$KURL_CLUSTER_UUID" ]; then return 0 fi if kubernetes_resource_exists kurl configmap kurl-cluster-uuid; then return 0 fi kubectl create configmap -n kurl kurl-cluster-uuid --from-literal=kurl_cluster_uuid="$KURL_CLUSTER_UUID" } # shellcheck disable=SC2148 function disable_rook_ceph_operator() { if ! is_rook_1; then return 0 fi kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=0 } function enable_rook_ceph_operator() { if ! is_rook_1; then return 0 fi kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=1 } function is_rook_1() { kubectl -n rook-ceph get cephblockpools replicapool &>/dev/null } function rook_ceph_osd_pods_gone() { if kubectl -n rook-ceph get pods -l app=rook-ceph-osd 2>/dev/null | grep 'rook-ceph-osd' &>/dev/null ; then return 1 fi return 0 } function prometheus_pods_gone() { if kubectl -n monitoring get pods -l app=prometheus 2>/dev/null | grep 'prometheus' &>/dev/null ; then return 1 fi if kubectl -n monitoring get pods -l app.kubernetes.io/name=prometheus 2>/dev/null | grep 'prometheus' &>/dev/null ; then # the labels changed with prometheus 0.53+ return 1 fi return 0 } function prometheus_operator_pods_gone() { if kubectl -n monitoring get pods -l app=kube-prometheus-stack-operator 2>/dev/null | grep 'prometheus' &>/dev/null ; then return 1 fi return 0 } function ekco_pods_gone() { pods_gone_by_selector kurl app=ekc-operator } # rook_disable_ekco_operator disables the ekco operator if it exists. function rook_disable_ekco_operator() { if kubernetes_resource_exists kurl deployment ekc-operator ; then log "Scaling down EKCO deployment to 0 replicas" kubernetes_scale_down kurl deployment ekc-operator log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logFail "Unable to scale down ekco operator" return 1 fi fi } # rook_enable_ekco_operator enables the ekco operator if it exists. function rook_enable_ekco_operator() { if kubernetes_resource_exists kurl deployment ekc-operator ; then echo "Scaling up EKCO deployment to 1 replica" kubernetes_scale kurl deployment ekc-operator 1 fi } function remove_rook_ceph() { # For further information see: https://github.com/rook/rook/blob/v1.11.2/Documentation/Storage-Configuration/ceph-teardown.md # make sure there aren't any PVs using rook before deleting it all_pv_drivers="$(kubectl get pv -o=jsonpath='{.items[*].spec.csi.driver}')" if echo "$all_pv_drivers" | grep "rook" &>/dev/null ; then logFail "There are still PVs using rook-ceph." logFail "Remove these PV(s) before continuing." return 1 fi # scale ekco to 0 replicas if it exists if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl scale deploy ekc-operator --replicas=0 log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logFail "Unable to scale down ekco operator" return 1 fi fi log "Waiting up to 1 minute to remove rook-ceph pool" if ! kubectl delete -n rook-ceph cephblockpool replicapool --timeout=60s; then logWarn "Unable to delete rook-ceph pool" fi log "Waiting up to 1 minute to remove rook-ceph Storage Classes" if ! kubectl get storageclass | grep rook | awk '{ print $1 }' | xargs -I'{}' kubectl delete storageclass '{}' --timeout=60s; then logFail "Unable to delete rook-ceph StorageClasses" return 1 fi # More info: https://github.com/rook/rook/blob/v1.10.12/Documentation/CRDs/Cluster/ceph-cluster-crd.md#cleanup-policy log "Patch Ceph cluster to allow deletion" kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{"spec":{"cleanupPolicy":{"confirmation":"yes-really-destroy-data"}}}' # remove all rook-ceph CR objects log "Removing rook-ceph custom resource objects - this may take some time:" log "Waiting up to 3 minutes to remove rook-ceph CephCluster resource" if ! kubectl delete cephcluster -n rook-ceph rook-ceph --timeout=180s; then # More info: https://github.com/rook/rook/blob/v1.10.12/Documentation/Storage-Configuration/ceph-teardown.md#removing-the-cluster-crd-finalizer logWarn "Timeout of 3 minutes faced deleting the rook-ceph CephCluster resource" logWarn "Removing critical finalizers" kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{"metadata":{"finalizers": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{"metadata":{"finalizers": []}}' log "Waiting up to 2 minutes to remove rook-ceph CephCluster resource after remove critical finalizers" if ! kubectl delete cephcluster -n rook-ceph rook-ceph --timeout=120s; then logWarn "Timeout of 2 minutes faced deleting the rook-ceph CephCluster resource after finalizers have be removed." logWarn "Forcing by removing all finalizers" local crd for crd in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}') ; do kubectl get -n rook-ceph "$crd" -o name | \ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{"metadata":{"finalizers": []}}' done # After remove the finalizers the resources might get deleted without the need to try again sleep 20s if kubectl get cephcluster -n rook-ceph rook-ceph >/dev/null 2>&1; then log "Waiting up to 1 minute to remove rook-ceph CephCluster resource" if ! kubectl delete cephcluster -n rook-ceph rook-ceph --timeout=60s; then logFail "Unable to delete the rook-ceph CephCluster resource" return 1 fi else log "The rook-ceph CephCluster resource was deleted" fi fi fi log "Removing rook-ceph custom resources" if ! kubectl get crd | grep 'ceph.rook.io' | awk '{ print $1 }' | xargs -I'{}' kubectl -n rook-ceph delete '{}' --all --timeout=60s; then logWarn "Unable to delete the rook-ceph custom resources" fi log "Removing rook-ceph Volume resources" if ! kubectl delete volumes.rook.io --all --timeout=60s; then logWarn "Unable to delete rook-ceph Volume resources" fi log "Waiting for rook-ceph OSD pods to be removed" if ! spinner_until 120 rook_ceph_osd_pods_gone; then logWarn "rook-ceph OSD pods were not deleted" fi log "Removing rook-ceph CRDs" if ! kubectl get crd | grep 'ceph.rook.io' | awk '{ print $1 }' | xargs -I'{}' kubectl delete crd '{}' --timeout=60s; then logWarn "Unable to delete rook-ceph CRDs" fi log "Removing rook-ceph objectbucket CRDs" if ! kubectl get crd | grep 'objectbucket.io' | awk '{ print $1 }' | xargs -I'{}' kubectl delete crd '{}' --timeout=60s; then logWarn "Unable to delete rook-ceph CRDs" fi log "Removing rook-ceph volumes CRD" if ! kubectl delete --ignore-not-found crd volumes.rook.io --timeout=60s; then logWarn "Unable delete rook-ceph volumes custom resource" fi log "Removing the rook-ceph Namespace" if ! kubectl delete ns rook-ceph --timeout=60s; then logFail "Unable to delete the rook-ceph Namespace" logFail "These resources are preventing the namespace's deletion:" kubectl api-resources --verbs=list --namespaced -o name \ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph return 1 fi # scale ekco back to 1 replicas if it exists if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl get configmap ekco-config -o yaml | \ sed --expression='s/maintain_rook_storage_nodes:[ ]*true/maintain_rook_storage_nodes: false/g' | \ kubectl -n kurl apply -f - kubectl -n kurl scale deploy ekc-operator --replicas=1 fi rm -rf /var/lib/rook || true rm -rf /opt/replicated/rook || true if [ -d "/var/lib/rook" ] || [ -d "/opt/replicated/rook" ]; then logWarn "Data within /var/lib/rook, /opt/replicated/rook and any bound disks has not been freed." fi # print success message logSuccess "Removed rook-ceph successfully!" } # scale down prometheus, move all 'rook-ceph' PVCs to provided storage class, scale up prometheus # Supported storage class migrations from ceph are: 'longhorn' and 'openebs' function rook_ceph_to_sc_migration() { local destStorageClass=$1 local didRunValidationChecks=$2 local scProvisioner scProvisioner=$(kubectl get sc "$destStorageClass" -ojsonpath='{.provisioner}') # we only support migrating to 'longhorn' and 'openebs' storage classes if [[ "$scProvisioner" != *"longhorn"* ]] && [[ "$scProvisioner" != *"openebs"* ]]; then bail "Ceph to $scProvisioner migration is not supported" fi report_addon_start "rook-ceph-to-${scProvisioner}-migration" "v2" # patch ceph so that it does not consume new devices that longhorn creates echo "Patching CephCluster storage.useAllDevices=false" kubectl -n rook-ceph patch cephcluster rook-ceph --type json --patch '[{"op": "replace", "path": "/spec/storage/useAllDevices", value: false}]' sleep 1 echo "Waiting for CephCluster to update" spinner_until 300 rook_osd_phase_ready || true # don't fail # set prometheus scale if it exists local ekcoScaledDown=0 if kubectl get namespace monitoring &>/dev/null; then if kubectl -n monitoring get prometheus k8s &>/dev/null; then # before scaling down prometheus, scale down ekco as it will otherwise restore the prometheus scale if kubernetes_resource_exists kurl deployment ekc-operator; then ekcoScaledDown=1 kubectl -n kurl scale deploy ekc-operator --replicas=0 log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logFail "Unable to scale down ekco operator" return 1 fi fi # scale down prometheus operator pods, not the actual prometheus pods # this way pvmigrate can place PVCs on the correct nodes if migrating to OpenEBS kubectl scale deployment -n monitoring prometheus-operator --replicas=0 log "Waiting for prometheus operator pods to be removed" spinner_until 300 prometheus_operator_pods_gone fi fi # scale down ekco if kotsadm is using rqlite. if kubernetes_resource_exists default statefulset kotsadm-rqlite ; then if [ "$ekcoScaledDown" = "0" ]; then if kubernetes_resource_exists kurl deployment ekc-operator; then ekcoScaledDown=1 kubectl -n kurl scale deploy ekc-operator --replicas=0 log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logFail "Unable to scale down ekco operator" return 1 fi fi fi fi # get the list of StorageClasses that use rook-ceph rook_scs=$(kubectl get storageclass | grep rook | grep -v '(default)' | awk '{ print $1}') # any non-default rook StorageClasses rook_default_sc=$(kubectl get storageclass | grep rook | grep '(default)' | awk '{ print $1}') # any default rook StorageClasses for rook_sc in $rook_scs do if [ "$didRunValidationChecks" == "1" ]; then # run the migration w/o validation checks $BIN_PVMIGRATE --source-sc "$rook_sc" --dest-sc "$destStorageClass" --rsync-image "$KURL_UTIL_IMAGE" --skip-free-space-check --skip-preflight-validation else # run the migration (without setting defaults) $BIN_PVMIGRATE --source-sc "$rook_sc" --dest-sc "$destStorageClass" --rsync-image "$KURL_UTIL_IMAGE" fi done for rook_sc in $rook_default_sc do if [ "$didRunValidationChecks" == "1" ]; then # run the migration w/o validation checks $BIN_PVMIGRATE --source-sc "$rook_sc" --dest-sc "$destStorageClass" --rsync-image "$KURL_UTIL_IMAGE" --skip-free-space-check --skip-preflight-validation --set-defaults else # run the migration (setting defaults) $BIN_PVMIGRATE --source-sc "$rook_sc" --dest-sc "$destStorageClass" --rsync-image "$KURL_UTIL_IMAGE" --set-defaults fi done # reset ekco scale if [ "$ekcoScaledDown" = "1" ] ; then kubectl -n kurl scale deploy ekc-operator --replicas=1 fi # reset prometheus scale if kubectl get namespace monitoring &>/dev/null; then if kubectl get prometheus -n monitoring k8s &>/dev/null; then kubectl scale deployment -n monitoring prometheus-operator --replicas=1 fi fi # print success message printf "${GREEN}Migration from rook-ceph to %s completed successfully!\n${NC}" "$scProvisioner" report_addon_success "rook-ceph-to-$scProvisioner-migration" "v2" } # if PVCs and object store data have both been migrated from rook-ceph and rook-ceph is no longer specified in the kURL spec, remove rook-ceph function maybe_cleanup_rook() { if [ -z "$ROOK_VERSION" ]; then # Just continue if Rook is installed. if ! kubectl get ns | grep -q rook-ceph; then return fi logStep "Removing Rook" export DID_MIGRATE_ROOK_PVCS=0 export DID_MIGRATE_ROOK_OBJECT_STORE=0 DID_MIGRATE_ROOK_PVCS=$(kubectl -n kurl get --ignore-not-found configmap kurl-migration-from-rook -o jsonpath='{ .data.DID_MIGRATE_ROOK_PVCS }') DID_MIGRATE_ROOK_OBJECT_STORE=$(kubectl -n kurl get --ignore-not-found configmap kurl-migration-from-rook -o jsonpath='{ .data.DID_MIGRATE_ROOK_OBJECT_STORE }') if [ "$DID_MIGRATE_ROOK_PVCS" == "1" ] && [ "$DID_MIGRATE_ROOK_OBJECT_STORE" == "1" ]; then report_addon_start "rook-ceph-removal" "v1.1" if ! remove_rook_ceph; then logFail "Unable to remove Rook." report_addon_fail "rook-ceph-removal" "v1.1" return fi kubectl delete configmap kurl-migration-from-rook -n kurl report_addon_success "rook-ceph-removal" "v1.1" return fi # If upgrade from Rook to OpenEBS without Minio we cannot remove Rook because # we do not know if the solution uses or not ObjectStore and if someone data will not be lost if [ "$DID_MIGRATE_ROOK_PVCS" == "1" ] && [ -z "$MINIO_VERSION" ]; then if [ -z "$DID_MIGRATE_ROOK_OBJECT_STORE" ] || [ "$DID_MIGRATE_ROOK_OBJECT_STORE" != "1" ]; then logWarn "PVC(s) were migrated from Rook but Object Store data was not, as no MinIO version was specified." logWarn "Rook will not be automatically removed without migrating Object Store data." logWarn "" logWarn "If you are sure that Object Store data is not used, you can manually perform this operation" logWarn "by running the remove_rook_ceph task:" logWarn "$ curl /task.sh | sudo bash -s remove_rook_ceph, i.e.:" logWarn "" logWarn "curl https://kurl.sh/latest/tasks.sh | sudo bash -s remove_rook_ceph" fi fi logFail "Unable to remove Rook." if [ "$DID_MIGRATE_ROOK_PVCS" != "1" ]; then logFail "Storage class migration did not succeed" fi if [ -n "$MINIO_VERSION" ] && [ "$DID_MIGRATE_ROOK_OBJECT_STORE" != "1" ]; then logFail "Object Store migration did not succeed" fi fi } function rook_osd_phase_ready() { if [ "$(current_rook_version)" = "1.0.4" ]; then [ "$(kubectl -n rook-ceph get cephcluster rook-ceph --template '{{.status.state}}')" = 'Created' ] else [ "$(kubectl -n rook-ceph get cephcluster rook-ceph --template '{{.status.phase}}')" = 'Ready' ] fi } function current_rook_version() { kubectl -n rook-ceph get deploy rook-ceph-operator -oyaml 2>/dev/null \ | grep ' image: ' \ | awk -F':' 'NR==1 { print $3 }' \ | sed 's/v\([^-]*\).*/\1/' } function current_ceph_version() { kubectl -n rook-ceph get deployment rook-ceph-mgr-a -o jsonpath='{.metadata.labels.ceph-version}' 2>/dev/null \ | awk -F'-' '{ print $1 }' } function rook_operator_ready() { local rook_status_phase= local rook_status_msg= rook_status_phase=$(kubectl -n rook-ceph get cephcluster rook-ceph --template '{{.status.phase}}') rook_status_msg=$(kubectl -n rook-ceph get cephcluster rook-ceph --template '{{.status.message}}') if [ "$rook_status_phase" != "Ready" ]; then log "Rook operator is not ready: $rook_status_msg" return 1 fi return 0 } # In certain edge cases, while migrating away from Rook, we may encounter issues. # Specifically, after we execute a pvmigrate operation to migrate the PVCs and to migrate the Object store, the system # may transition to an unhealthy state. This problem appears to be connected to specific modules # [root@rook-ceph-operator-747c86774c-7v95s /]# ceph health detail # HEALTH_ERR 2 mgr modules have failed # MGR_MODULE_ERROR 2 mgr modules have failed # Module 'dashboard' has failed: error('No socket could be created',) # Module 'prometheus' has failed: error('No socket could be created',) # The proposed workaround ensures a smooth transition during the migration and upgrade processes, ultimately allowing # for the successful deletion of Rook. To this end, this PR automates the resolution process by rectifying the Rook Ceph # state and allowing the migration to proceed, given that Rook will be removed in the end. It's important to note that # this automated fix is only applied during the checks performed when we are in the process of migrating away from Rook # and when Rook's removal is the intended outcome. # # Note this method is a duplication of rook_is_healthy_to_upgrade which now is called in the migration process # ONLY when we are moving from Rook. We should not try to fix it in other circumstances function rook_is_healthy_to_migrate_from() { log "Awaiting up to 5 minutes to check Rook Ceph Pod(s) are Running" if ! spinner_until 300 check_for_running_pods "rook-ceph"; then logFail "Rook Ceph has unhealthy Pod(s)" return 1 fi log "Awaiting up to 10 minutes to check that Rook Ceph is health" if ! $DIR/bin/kurl rook wait-for-health 600 ; then logWarn "Rook Ceph is unhealthy" output=$(kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph status) echo "" echo output echo "" if [[ $output == *"Module 'dashboard'"* ]] || [[ $output == *"Module 'prometheus'"* ]]; then echo "Disabling Ceph manager modules in order to get Ceph healthy again." kubectl -n rook-ceph exec deployment/rook-ceph-tools -- ceph mgr module disable prometheus || true kubectl -n rook-ceph exec deployment/rook-ceph-tools -- ceph mgr module disable dashboard || true fi log "Verify Rook Ceph health after try to fix" if ! $DIR/bin/kurl rook wait-for-health 600; then kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph status logFail "Rook Ceph is unhealthy" return 1 fi return 1 fi log "Checking Rook Ceph versions and replicas" kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{.metadata.name}{" \treq/upd/avl: "}{.spec.replicas}{"/"}{.status.updatedReplicas}{"/"}{.status.readyReplicas}{" \trook-version="}{.metadata.labels.rook-version}{"\n"}{end}' local rook_versions= rook_versions="$(kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{"rook-version="}{.metadata.labels.rook-version}{"\n"}{end}' | sort | uniq)" if [ -n "${rook_versions}" ] && [ "$(echo "${rook_versions}" | wc -l)" -gt "1" ]; then logFail "Multiple Rook versions detected" logFail "${rook_versions}" return 1 fi log "Checking Ceph versions and replicas" kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{.metadata.name}{" \treq/upd/avl: "}{.spec.replicas}{"/"}{.status.updatedReplicas}{"/"}{.status.readyReplicas}{" \tceph-version="}{.metadata.labels.ceph-version}{"\n"}{end}' local ceph_versions_found= ceph_versions_found="$(kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{"ceph-version="}{.metadata.labels.ceph-version}{"\n"}{end}' | sort | uniq)" if [ -n "${ceph_versions_found}" ] && [ "$(echo "${ceph_versions_found}" | wc -l)" -gt "1" ]; then # It is required because an Rook Ceph bug which was sorted out with the release 1.4.8 # More info: https://github.com/rook/rook/pull/6610 if [ "$(echo "${ceph_versions_found}" | wc -l)" == "2" ] && [ "$(echo "${ceph_versions_found}" | grep "0.0.0-0")" ]; then log "Found two ceph versions but one of them is 0.0.0-0 which will be ignored" echo "${ceph_versions_found}" else logFail "Multiple Ceph versions detected" logFail "${ceph_versions_found}" return 1 fi fi return 0 } function rook_is_healthy_to_upgrade() { log "Awaiting up to 5 minutes to check Rook Ceph Pod(s) are Running" if ! spinner_until 300 check_for_running_pods "rook-ceph"; then logFail "Rook Ceph has unhealthy Pod(s)" return 1 fi log "Awaiting up to 10 minutes to check that Rook Ceph is health" if ! $DIR/bin/kurl rook wait-for-health 600 ; then kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph status logFail "Rook Ceph is unhealthy" return 1 fi log "Checking Rook Ceph versions and replicas" kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{.metadata.name}{" \treq/upd/avl: "}{.spec.replicas}{"/"}{.status.updatedReplicas}{"/"}{.status.readyReplicas}{" \trook-version="}{.metadata.labels.rook-version}{"\n"}{end}' local rook_versions= rook_versions="$(kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{"rook-version="}{.metadata.labels.rook-version}{"\n"}{end}' | sort | uniq)" if [ -n "${rook_versions}" ] && [ "$(echo "${rook_versions}" | wc -l)" -gt "1" ]; then logFail "Multiple Rook versions detected" logFail "${rook_versions}" return 1 fi log "Checking Ceph versions and replicas" kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{.metadata.name}{" \treq/upd/avl: "}{.spec.replicas}{"/"}{.status.updatedReplicas}{"/"}{.status.readyReplicas}{" \tceph-version="}{.metadata.labels.ceph-version}{"\n"}{end}' local ceph_versions_found= ceph_versions_found="$(kubectl -n rook-ceph get deployment -l rook_cluster=rook-ceph -o jsonpath='{range .items[*]}{"ceph-version="}{.metadata.labels.ceph-version}{"\n"}{end}' | sort | uniq)" if [ -n "${ceph_versions_found}" ] && [ "$(echo "${ceph_versions_found}" | wc -l)" -gt "1" ]; then # It is required because an Rook Ceph bug which was sorted out with the release 1.4.8 # More info: https://github.com/rook/rook/pull/6610 if [ "$(echo "${ceph_versions_found}" | wc -l)" == "2" ] && [ "$(echo "${ceph_versions_found}" | grep "0.0.0-0")" ]; then log "Found two ceph versions but one of them is 0.0.0-0 which will be ignored" echo "${ceph_versions_found}" else logFail "Multiple Ceph versions detected" logFail "${ceph_versions_found}" return 1 fi fi return 0 } # Check if the kurl-migration-from-rook exists then, if not creates it # To add DID_MIGRATE_ROOK_PVCS = "1" in order to track that the PVCs were migrated function add_rook_pvc_migration_status() { if ! kubectl -n kurl get configmap kurl-migration-from-rook 2>/dev/null; then log "Creating ConfigMap to track status of migration from Rook" kubectl create configmap kurl-migration-from-rook -n kurl fi kubectl patch configmap kurl-migration-from-rook -n kurl --type merge -p '{"data":{"DID_MIGRATE_ROOK_PVCS":"1"}}' export DID_MIGRATE_ROOK_PVCS=1 } # Check if the kurl-migration-from-rook exists then, if not creates it # To add DID_MIGRATE_ROOK_OBJECT_STORE = "1" in order to track that the PVCs were migrated function add_rook_store_object_migration_status() { if ! kubectl -n kurl get configmap kurl-migration-from-rook 2>/dev/null; then log "Creating ConfigMap to track status of migration from Rook" kubectl create configmap kurl-migration-from-rook -n kurl fi kubectl patch configmap kurl-migration-from-rook -n kurl --type merge -p '{"data":{"DID_MIGRATE_ROOK_OBJECT_STORE":"1"}}' export DID_MIGRATE_ROOK_OBJECT_STORE=1 } # rook_maybe_migrate_from_openebs may migrate data from OpenEBS to Rook when all the following # conditions are met: # - Ekco version is >= 0.27.1. # - OpenEBS and Rook are selected on the Installer. # - Rook's minimum node count is set to a value > 1. # - The number of nodes on the cluster is >= than the Rook minimum node count. # - The 'scaling' storage class exists. function rook_maybe_migrate_from_openebs() { semverCompare "$EKCO_VERSION" "0.27.1" if [ "$SEMVER_COMPARE_RESULT" -lt "0" ]; then return 0 fi if [ -z "$ROOK_VERSION" ] || [ -z "$OPENEBS_VERSION" ]; then return 0 fi if [ -z "$ROOK_MINIMUM_NODE_COUNT" ] || [ "$ROOK_MINIMUM_NODE_COUNT" -lt "3" ]; then return 0 fi rook_maybe_migrate_from_openebs_internal } # rook_maybe_migrate_from_openebs_internal SHOULD NOT BE CALLED DIRECTLY. # it is called by rook_maybe_migrate_from_openebs and rook_maybe_migrate_from_openebs_tasks when all the conditions are met. # it will check that the required environment variables (EKCO_AUTH_TOKEN and EKCO_ADDRESS) are set and then # check EKCO to see if the migration is available. If it is, it will prompt the user to start it. function rook_maybe_migrate_from_openebs_internal() { if [ -z "$EKCO_AUTH_TOKEN" ]; then logFail "Internal Error: an authentication token is required to start the OpenEBS to Rook multi-node migration." return 0 fi if [ -z "$EKCO_ADDRESS" ]; then logFail "Internal Error: unable to determine network address of the kURL operator." return 0 fi # check if OpenEBS to Rook multi-node migration is available - if it is, prompt the user to start it if cluster_status_msg=$("${DIR}"/bin/kurl cluster migrate-multinode-storage --ekco-address "$EKCO_ADDRESS" --ekco-auth-token "$EKCO_AUTH_TOKEN" --check-status 2>&1); then printf " The installer detected both OpenEBS and Rook installations in your cluster. Migration from OpenEBS to Rook\n" printf " is possible now, but it requires scaling down applications using OpenEBS volumes, causing downtime. You can\n" printf " choose to run the migration later if preferred.\n" printf "Would you like to continue with the migration now? \n" if ! confirmN ; then printf "Not migrating from OpenEBS to Rook\n" return 0 fi else # migration is not available, so exit printf "Migration from OpenEBS to Rook is not available: %s\n" "$(echo $cluster_status_msg | sed s/'Error: '//)" return 0 fi # Initiate OpenEBS to Rook multi-node migration if ! "${DIR}"/bin/kurl cluster migrate-multinode-storage --ekco-address "$EKCO_ADDRESS" --ekco-auth-token "$EKCO_AUTH_TOKEN" --ready-timeout "$(storage_migration_ready_timeout)" --assume-yes; then logFail "Failed to migrate from OpenEBS to Rook. The installation will move on." logFail "If you would like to run the migration later, run the following command:" logFail " $DIR/bin/kurl cluster migrate-multinode-storage --ekco-address $EKCO_ADDRESS --ekco-auth-token $EKCO_AUTH_TOKEN" return 0 fi } # rook_maybe_migrate_from_openebs_tasks will call rook_maybe_migrate_from_openebs_internal # after determining values for EKCO_AUTH_TOKEN and EKCO_ADDRESS from the cluster. function rook_maybe_migrate_from_openebs_tasks() { local ekcoAddress= local ekcoAuthToken= ekcoAddress=$(get_ekco_addr) ekcoAuthToken=$(get_ekco_storage_migration_auth_token) if [ -z "$ekcoAddress" ] || [ -z "$ekcoAuthToken" ]; then return 0 fi export EKCO_ADDRESS="$ekcoAddress" export EKCO_AUTH_TOKEN="$ekcoAuthToken" # are both rook and openebs installed, not just specified? if ! kubectl get ns | grep -q rook-ceph && ! kubectl get ns | grep -q openebs; then bail "Rook and OpenEBS must be installed in order to migrate to multi-node storage" fi rook_maybe_migrate_from_openebs_internal } # shellcheck disable=SC2148 export PV_BASE_PATH=/opt/replicated/rook # rook_upgrade_maybe_report_upgrade_rook checks if rook should be upgraded before upgrading k8s, # prompts the user to confirm the upgrade, and starts the upgrade process. function rook_upgrade_maybe_report_upgrade_rook() { local current_version= current_version="$(current_rook_version)" local desired_version="$ROOK_VERSION" if ! rook_upgrade_should_upgrade_rook "$current_version" "$desired_version" ; then return fi if ! rook_upgrade_prompt "$current_version" "$desired_version" ; then bail "Not upgrading Rook" fi if ! rook_upgrade_storage_check "$current_version" "$desired_version" ; then bail "Not upgrading Rook" fi rook_upgrade_report_upgrade_rook "$current_version" "$desired_version" # shellcheck disable=SC1090 addon_source "rook" "$ROOK_VERSION" # This will undo the override from above prior to running addon_install } # rook_upgrade_should_upgrade_rook checks the currently installed rook version and the desired rook # version. If the current version is two minor versions or more less than the desired version, then # the function will return true. function rook_upgrade_should_upgrade_rook() { local current_version="$1" local desired_version="$2" # rook is not installed, so no upgrade if [ -z "$current_version" ]; then return 1 fi # rook is not requested to be installed, so no upgrade if [ -z "$desired_version" ]; then return 1 fi semverParse "$current_version" # shellcheck disable=SC2154 local current_rook_version_major="$major" # shellcheck disable=SC2154 local current_rook_version_minor="$minor" semverParse "$desired_version" local next_rook_version_major="$major" local next_rook_version_minor="$minor" # shellcheck disable=SC2154 local next_rook_version_patch="$patch" # upgrades not supported for major versions not equal to 1 if [ "$current_rook_version_major" != "1" ] || [ "$next_rook_version_major" != "1" ]; then return 1 fi # upgrade not needed for minor versions equal if [ "$current_rook_version_minor" = "$next_rook_version_minor" ]; then return 1 fi # upgrades not supported to minor versions less than 4 if [ "$next_rook_version_minor" -lt "4" ]; then return 1 # special case 1.0 to 1.4 upgrade elif [ "$next_rook_version_minor" = "4" ]; then # upgrades not supported from to 1.4 patch versions less than 1.4.9 if [ "$next_rook_version_patch" -lt "9" ]; then return 1 fi return 0 fi # current version must be greater than or equal to desired version - 1 since the add-on itself # can do single version upgrades although this is not true for minor versions less than 4 if [ "$current_rook_version_minor" -ge "$((next_rook_version_minor - 1))" ]; then return 1 fi return 0 } # rook_upgrade_prompt prompts the user to confirm the rook upgrade. function rook_upgrade_prompt() { local current_version="$1" local desired_version="$2" logWarn "$(printf "This script will upgrade Rook from %s to %s." "$current_version" "$desired_version")" logWarn "Upgrading Rook will take some time and will place additional load on your server." if ! "$DIR"/bin/kurl rook has-sufficient-blockdevices ; then logWarn "In order to complete this migration, you may need to attach a blank disk to each node in the cluster for Rook to use." fi printf "Would you like to continue? " confirmN } # rook_upgrade_storage_check verifies that enough disk space exists for the rook upgrade to complete # successfully. function rook_upgrade_storage_check() { local current_version="$1" local desired_version="$2" local archive_size= archive_size="$(rook_upgrade_required_archive_size "$current_version" "$desired_version")" # 2.5x archive size for extracted files # 1x archive size for container images common_upgrade_storage_check "$archive_size" $((5/2)) 1 "Rook" } # rook_upgrade_report_upgrade_rook reports the upgrade and starts the upgrade process. function rook_upgrade_report_upgrade_rook() { local current_version="$1" local desired_version="$2" local from_version= from_version="$(common_upgrade_version_to_major_minor "$current_version")" local rook_upgrade_version="v2.0.0" # if you change this code, change the version report_addon_start "rook_${from_version}_to_${desired_version}" "$rook_upgrade_version" export REPORTING_CONTEXT_INFO="rook_${from_version}_to_${desired_version} $rook_upgrade_version" rook_upgrade "$current_version" "$desired_version" export REPORTING_CONTEXT_INFO="" report_addon_success "rook_${from_version}_to_${desired_version}" "$rook_upgrade_version" } # rook_upgrade upgrades will fetch the add-on and load the images for the upgrade and finally run # the upgrade script. function rook_upgrade() { local current_version="$1" local desired_version="$2" rook_disable_ekco_operator # when invoked in a subprocess the failure of this function will not cause the script to exit # sanity check that the rook version is valid rook_upgrade_step_versions "${ROOK_STEP_VERSIONS[*]}" "$current_version" "$desired_version" 1>/dev/null logStep "Upgrading Rook from $current_version to $desired_version" common_upgrade_print_list_of_minor_upgrades "$current_version" "$desired_version" echo "This may take some time." rook_upgrade_addon_fetch_and_load "$current_version" "$desired_version" rook_upgrade_prompt_missing_images "$current_version" "$desired_version" # delete the mutatingwebhookconfiguration and remove the rook-priority.kurl.sh label # as the EKCO rook-priority.kurl.sh mutating webhook is no longer necessary passed Rook # 1.0.4. kubectl label namespace rook-ceph rook-priority.kurl.sh- kubectl delete mutatingwebhookconfigurations rook-priority.kurl.sh --ignore-not-found if common_upgrade_is_version_included "$current_version" "$desired_version" "1.4" ; then addon_source "rookupgrade" "10to14" rookupgrade_10to14_upgrade "$current_version" # delete both the compressed and decompressed addon files to free up space rm -f "$DIR/assets/rookupgrade-10to14.tar.gz" rm -rf "$DIR/addons/rookupgrade/10to14" fi # if desired_version is greater than 1.4, then continue with the upgrade if [ "$(common_upgrade_compare_versions "$desired_version" "1.4")" = "1" ]; then rook_upgrade_do_rook_upgrade "$(common_upgrade_max_version "1.4" "$current_version")" "$desired_version" fi rook_enable_ekco_operator logSuccess "Successfully upgraded Rook from $current_version to $desired_version" } # rook_upgrade_do_rook_upgrade will step through each minor version upgrade from $current_version to # $desired_version function rook_upgrade_do_rook_upgrade() { local current_version="$1" local desired_version="$2" local step= while read -r step; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi if ! addon_exists "rook" "$step" ; then bail "Rook version $step not found" fi logStep "Upgrading to Rook $step" # temporarily set the ROOK_VERSION since the add-on script relies on it local old_rook_version="$ROOK_VERSION" export ROOK_VERSION="$step" # shellcheck disable=SC1090 addon_source "rook" "$step" # this will override the rook $ROOK_VERSION add-on functions if commandExists "rook_should_fail_install" ; then # NOTE: there is no way to know this is the correct rook version function if rook_should_fail_install ; then bail "Rook $desired_version will not be installed due to failed preflight checks" fi fi # NOTE: there is no way to know this is the correct rook version function rook # upgrade to the step version ROOK_VERSION="$old_rook_version" # if this is not the last version in the loop, then delete the addon files to free up space if [ "$step" != "$desired_version" ]; then rm -f "$DIR/assets/rook-$step.tar.gz" rm -rf "$DIR/addons/rook/$step" fi logSuccess "Upgraded to Rook $step successfully" done <<< "$(rook_upgrade_step_versions "${ROOK_STEP_VERSIONS[*]}" "$current_version" "$desired_version")" if [ -n "$AIRGAP_MULTI_ADDON_PACKAGE_PATH" ]; then # delete the airgap package files to free up space rm -f "$AIRGAP_MULTI_ADDON_PACKAGE_PATH" fi } # rook_upgrade_addon_fetch_and_load will fetch all add-on versions from $current_version to $desired_version. function rook_upgrade_addon_fetch_and_load() { if [ "$AIRGAP" = "1" ]; then rook_upgrade_addon_fetch_and_load_airgap "$@" else rook_upgrade_addon_fetch_and_load_online "$@" fi } # rook_upgrade_addon_fetch_and_load_online will fetch all add-on versions, one at a time, from $current_version # to $desired_version. function rook_upgrade_addon_fetch_and_load_online() { local current_version="$1" local desired_version="$2" logStep "Downloading images required for Rook $current_version to $desired_version upgrade" if common_upgrade_is_version_included "$current_version" "$desired_version" "1.4" ; then rook_upgrade_addon_fetch_and_load_online_step "rookupgrade" "10to14" fi if [ "$(common_upgrade_compare_versions "$desired_version" "1.4")" = "1" ]; then local step= while read -r step; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi if [ "$step" = "$ROOK_VERSION" ]; then rook_upgrade_addon_fetch_and_load_online_step "rook" "$ROOK_VERSION" "$ROOK_S3_OVERRIDE" else rook_upgrade_addon_fetch_and_load_online_step "rook" "$step" fi done <<< "$(rook_upgrade_step_versions "${ROOK_STEP_VERSIONS[*]}" "$(common_upgrade_max_version "1.4" "$current_version")" "$desired_version")" fi logSuccess "Images loaded for Rook $current_version to $desired_version upgrade" } # rook_upgrade_addon_fetch_and_load_online_step will fetch an individual add-on version. function rook_upgrade_addon_fetch_and_load_online_step() { local addon="$1" local version="$2" local s3_override="$3" addon_fetch "$addon" "$version" "$s3_override" addon_load "$addon" "$version" } # rook_upgrade_addon_fetch_and_load_airgap will prompt the user to fetch all add-on versions from # $current_version to $desired_version. function rook_upgrade_addon_fetch_and_load_airgap() { local current_version="$1" local desired_version="$2" if rook_upgrade_has_all_addon_version_packages "$current_version" "$desired_version" ; then local node_missing_images= # shellcheck disable=SC2086 node_missing_images=$(rook_upgrade_nodes_missing_images "$current_version" "$desired_version" "$(get_local_node_name)" "") if [ -z "$node_missing_images" ]; then log "All images required for Rook $current_version to $desired_version upgrade are present on this node" return fi fi logStep "Downloading images required for Rook $current_version to $desired_version upgrade" local addon_versions=() if common_upgrade_is_version_included "$current_version" "$desired_version" "1.4" ; then addon_versions+=( "rookupgrade-10to14" ) fi if [ "$(common_upgrade_compare_versions "$desired_version" "1.4")" = "1" ]; then local step= while read -r step; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi # the last version already included in the airgap bundle if [ "$step" = "$desired_version" ]; then continue fi addon_versions+=( "rook-$step" ) done <<< "$(rook_upgrade_step_versions "${ROOK_STEP_VERSIONS[*]}" "$(common_upgrade_max_version "1.4" "$current_version")" "$desired_version")" fi addon_fetch_multiple_airgap "${addon_versions[@]}" if common_upgrade_is_version_included "$current_version" "$desired_version" "1.4" ; then addon_load "rookupgrade" "10to14" fi if [ "$(common_upgrade_compare_versions "$desired_version" "1.4")" = "1" ]; then local step= while read -r step; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi addon_load "rook" "$step" done <<< "$(rook_upgrade_step_versions "${ROOK_STEP_VERSIONS[*]}" "$(common_upgrade_max_version "1.4" "$current_version")" "$desired_version")" fi logSuccess "Images loaded for Rook $current_version to $desired_version upgrade" } # rook_upgrade_has_all_addon_version_packages will return 1 if any add-on versions are missing that # are necessary to perform the upgrade. function rook_upgrade_has_all_addon_version_packages() { local current_version="$1" local desired_version="$2" if common_upgrade_is_version_included "$current_version" "$desired_version" "1.4" ; then if [ ! -f "addons/rookupgrade/10to14/Manifest" ]; then return 1 fi fi if [ "$(common_upgrade_compare_versions "$desired_version" "1.4")" = "1" ]; then local step= while read -r step; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi if [ ! -f "addons/rook/$step/Manifest" ]; then return 1 fi done <<< "$(rook_upgrade_step_versions "${ROOK_STEP_VERSIONS[*]}" "$(common_upgrade_max_version "1.4" "$current_version")" "$desired_version")" fi return 0 } # rook_upgrade_prompt_missing_images prompts the user to run the command to load the images on all # remote nodes before proceeding. function rook_upgrade_prompt_missing_images() { local current_version="$1" local desired_version="$2" local node_missing_images= # shellcheck disable=SC2086 node_missing_images=$(rook_upgrade_nodes_missing_images "$current_version" "$desired_version" "" "$(get_local_node_name)") common_prompt_task_missing_assets "$node_missing_images" "$current_version" "$desired_version" "Rook" "rook-upgrade-load-images" } # rook_upgrade_nodes_missing_images will print a list of nodes that are missing images for the # given rook versions. function rook_upgrade_nodes_missing_images() { local current_version="$1" local desired_version="$2" local target_host="$3" local exclude_hosts="$4" local images_list= images_list="$(rook_upgrade_images_list "$current_version" "$desired_version")" if [ -z "$images_list" ]; then return fi kubernetes_nodes_missing_images "$images_list" "$target_host" "$exclude_hosts" } # rook_upgrade_images_list will print a list of images for the given rook versions. function rook_upgrade_images_list() { local current_version="$1" local desired_version="$2" local images_list= if common_upgrade_is_version_included "$current_version" "$desired_version" "1.4" ; then images_list="$(rook_upgrade_list_rook_ceph_images_in_manifest_file "addons/rookupgrade/10to14/Manifest")" fi if [ "$(common_upgrade_compare_versions "$desired_version" "1.4")" = "1" ]; then local step= while read -r step; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi # the last version already included in the airgap bundle if [ "$step" = "$desired_version" ]; then continue fi images_list="$(common_upgrade_merge_images_list \ "$images_list" \ "$(rook_upgrade_list_rook_ceph_images_in_manifest_file "addons/rook/$step/Manifest")" \ )" done <<< "$(rook_upgrade_step_versions "${ROOK_STEP_VERSIONS[*]}" "$(common_upgrade_max_version "1.4" "$current_version")" "$desired_version")" fi echo "$images_list" } # rook_upgrade_list_rook_ceph_images_in_manifest_file will list the rook/ceph images in the given # manifest file. function rook_upgrade_list_rook_ceph_images_in_manifest_file() { local manifest_file="$1" local image_list= for image in $(grep "^image " "$manifest_file" | grep -F "rook/ceph" | awk '{print $3}' | tr '\n' ' ') ; do image_list=$image_list" $(canonical_image_name "$image")" done echo "$image_list" | xargs # trim whitespace } # rook_upgrade_step_versions returns a list of upgrade steps that need to be performed, based on # the supplied space-delimited set of step versions, for use by other functions. This list is # inclusive of the from_version. e.g. "1.5.12\n1.6.11\n1.7.11" function rook_upgrade_step_versions() { local step_versions= read -ra step_versions <<< "$1" local from_version=$2 local desired_version=$3 local to_version= to_version=$(common_upgrade_version_to_major_minor "$desired_version") # check that major versions are the same local first_major= first_major=$(common_upgrade_major_minor_to_major "$from_version") local last_major= last_major=$(common_upgrade_major_minor_to_major "$to_version") if [ "$first_major" != "$last_major" ]; then bail "Upgrade across major version from $from_version to $to_version is not supported." fi local first_minor= local last_minor= first_minor=$(common_upgrade_major_minor_to_minor "$from_version") last_minor=$(common_upgrade_major_minor_to_minor "$to_version") if [ "${#step_versions[@]}" -le "$last_minor" ]; then bail "Upgrade from $from_version to $to_version is not supported." fi # if there are no steps to perform, return if [ "$first_minor" -gt "$last_minor" ]; then return fi if [ "$desired_version" != "$to_version" ]; then last_minor=$((last_minor - 1)) # last version is the desired version fi local step= for (( step=first_minor ; step<=last_minor ; step++ )); do echo "${step_versions[$step]}" done if [ "$desired_version" != "$to_version" ]; then echo "$desired_version" fi } # rook_upgrade_tasks_load_images is called from tasks.sh to load images on remote notes for the # rook upgrade. function rook_upgrade_tasks_load_images() { local from_version= local to_version= local airgap= common_upgrade_tasks_params "$@" common_task_require_param "from-version" "$from_version" common_task_require_param "to-version" "$to_version" if [ "$airgap" = "1" ]; then export AIRGAP=1 fi export KUBECONFIG=/etc/kubernetes/admin.conf download_util_binaries if ! rook_upgrade_storage_check "$from_version" "$to_version" ; then bail "Failed storage check" fi if ! rook_upgrade_addon_fetch_and_load "$from_version" "$to_version" ; then bail "Failed to load images" fi } # rook_upgrade_required_archive_size will determine the size of the archive that will be downloaded # to upgrade between the supplied rook versions. The amount of space required within # $KURL_INSTALL_DIRECTORY and /var/lib/containerd or /var/lib/docker can then be derived from this # (2x archive size in kurl, 3.5x in containerd/docker). function rook_upgrade_required_archive_size() { local current_version="$1" local desired_version="$2" semverParse "$current_version" # shellcheck disable=SC2154 local current_rook_version_major="$major" # shellcheck disable=SC2154 local current_rook_version_minor="$minor" semverParse "$desired_version" local next_rook_version_major="$major" local next_rook_version_minor="$minor" # if the major versions are not '1', exit with an error if [ "$current_rook_version_major" != "1" ] || [ "$next_rook_version_major" != "1" ]; then bail "Rook major versions must be 1" fi local total_archive_size=0 if [ "$current_rook_version_minor" -lt 4 ] && [ "$next_rook_version_minor" -ge 4 ]; then total_archive_size=$((total_archive_size + 3400)) # 3.4 GB for the 1.0 to 1.4 archive total_archive_size=$((total_archive_size + 1300)) # 1.3 GB for the 1.4 archive fi if [ "$current_rook_version_minor" -lt 5 ] && [ "$next_rook_version_minor" -ge 5 ]; then total_archive_size=$((total_archive_size + 1400)) # 1.4 GB for the 1.5 archive fi if [ "$current_rook_version_minor" -lt 6 ] && [ "$next_rook_version_minor" -ge 6 ]; then total_archive_size=$((total_archive_size + 1400)) # 1.4 GB for the 1.6 archive fi if [ "$current_rook_version_minor" -lt 7 ] && [ "$next_rook_version_minor" -ge 7 ]; then total_archive_size=$((total_archive_size + 1500)) # 1.5 GB for the 1.7 archive fi if [ "$current_rook_version_minor" -lt 8 ] && [ "$next_rook_version_minor" -ge 8 ]; then total_archive_size=$((total_archive_size + 1700)) # 1.7 GB for the 1.8 archive fi if [ "$current_rook_version_minor" -lt 9 ] && [ "$next_rook_version_minor" -ge 9 ]; then total_archive_size=$((total_archive_size + 1800)) # 1.8 GB for the 1.9 archive fi if [ "$current_rook_version_minor" -lt 10 ] && [ "$next_rook_version_minor" -ge 10 ]; then total_archive_size=$((total_archive_size + 1800)) # 1.8 GB for the 1.10 archive fi # add 2gb for each version past 1.10 # TODO handle starting from a version past 1.10 if [ "$next_rook_version_minor" -gt 10 ]; then total_archive_size=$((total_archive_size + 2000 * (next_rook_version_minor - 10))) fi echo "$total_archive_size" } function longhorn_host_init_common() { longhorn_install_iscsi_if_missing_common $1 install_nfs_utils_if_missing_common $1 mkdir -p /var/lib/longhorn chmod 700 /var/lib/longhorn } function longhorn_install_iscsi_if_missing_common() { local src=$1 if ! systemctl list-units | grep -q iscsid ; then if ! host_packages_shipped; then ensure_host_package iscsi-initiator-utils open-iscsi else case "$LSB_DIST" in ubuntu) dpkg_install_host_archives "$src" open-iscsi ;; centos|rhel|ol|rocky|amzn) yum_install_host_archives "$src" iscsi-initiator-utils ;; esac fi fi if ! systemctl -q is-active iscsid; then systemctl start iscsid fi if ! systemctl -q is-enabled iscsid; then systemctl enable iscsid fi } function install_nfs_utils_if_missing_common() { local src=$1 if ! systemctl list-units | grep -q nfs-utils ; then if ! host_packages_shipped; then ensure_host_package nfs-utils nfs-common else case "$LSB_DIST" in ubuntu) dpkg_install_host_archives "$src" nfs-common ;; centos|rhel|ol|rocky|amzn) yum_install_host_archives "$src" nfs-utils ;; esac fi fi if ! systemctl -q is-active nfs-utils; then systemctl start nfs-utils fi if ! systemctl -q is-enabled nfs-utils; then systemctl enable nfs-utils fi } # longhorn_run_pvmigrate calls pvmigrate to migrate longhorn data to a different storage class. if a failure happen # then rolls back the original number of volumes and replicas. function longhorn_run_pvmigrate() { local longhornStorageClass=$1 local destStorageClass=$2 local didRunValidationChecks=$3 local setDefaults=$4 local skipFreeSpaceCheckFlag="" local skipPreflightValidationFlag="" if [ "$didRunValidationChecks" == "1" ]; then skipFreeSpaceCheckFlag="--skip-free-space-check" skipPreflightValidationFlag="--skip-preflight-validation" fi local setDefaultsFlag="" if [ "$setDefaults" == "1" ]; then setDefaultsFlag="--set-defaults" fi if ! $BIN_PVMIGRATE --source-sc "$longhornStorageClass" --dest-sc "$destStorageClass" --rsync-image "$KURL_UTIL_IMAGE" "$skipFreeSpaceCheckFlag" "$skipPreflightValidationFlag" "$setDefaultsFlag"; then longhorn_restore_migration_replicas return 1 fi return 0 } # scale down prometheus, move all 'longhorn' PVCs to provided storage class, scale up prometheus # Supported storage class migrations from longhorn are: 'rook' and 'openebs' function longhorn_to_sc_migration() { local destStorageClass=$1 local didRunValidationChecks=$2 local scProvisioner scProvisioner=$(kubectl get sc "$destStorageClass" -ojsonpath='{.provisioner}') # we only support migrating to 'rook' and 'openebs' storage classes if [[ "$scProvisioner" != *"rook"* ]] && [[ "$scProvisioner" != *"openebs"* ]]; then bail "Longhorn to $scProvisioner migration is not supported" fi report_addon_start "longhorn-to-${scProvisioner}-migration" "v1" # set prometheus scale if it exists local ekcoScaledDown=0 if kubectl get namespace monitoring &>/dev/null; then if kubectl -n monitoring get prometheus k8s &>/dev/null; then # before scaling down prometheus, scale down ekco as it will otherwise restore the prometheus scale if kubernetes_resource_exists kurl deployment ekc-operator; then ekcoScaledDown=1 kubectl -n kurl scale deploy ekc-operator --replicas=0 log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logFail "Unable to scale down ekco operator" return 1 fi fi # scale down prometheus operator pods, not the actual prometheus pods # this way pvmigrate can place PVCs on the correct nodes if migrating to OpenEBS kubectl scale deployment -n monitoring prometheus-operator --replicas=0 log "Waiting for prometheus operator pods to be removed" spinner_until 300 prometheus_operator_pods_gone fi fi # scale down ekco if kotsadm is using rqlite. if kubernetes_resource_exists default statefulset kotsadm-rqlite ; then if [ "$ekcoScaledDown" = "0" ]; then if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl scale deploy ekc-operator --replicas=0 log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logFail "Unable to scale down ekco operator" return 1 fi fi fi fi longhornStorageClasses=$(kubectl get storageclass | grep longhorn | grep -v '(default)' | awk '{ print $1}') # any non-default longhorn StorageClasses for longhornStorageClass in $longhornStorageClasses do if ! longhorn_run_pvmigrate "$longhornStorageClass" "$destStorageClass" "$didRunValidationChecks" "0"; then bail "Failed to migrate PVCs from $longhornStorageClass to $destStorageClass" fi done longhornDefaultStorageClass=$(kubectl get storageclass | grep longhorn | grep '(default)' | awk '{ print $1}') # any default longhorn StorageClasses for longhornStorageClass in $longhornDefaultStorageClass do if ! longhorn_run_pvmigrate "$longhornStorageClass" "$destStorageClass" "$didRunValidationChecks" "1"; then bail "Failed to migrate PVCs from $longhornStorageClass to $destStorageClass" fi kubectl annotate storageclass "$longhornStorageClass" storageclass.kubernetes.io/is-default-class- done if ! longhorn_restore_migration_replicas; then log "Failed to restore the scale of Longhorn volumes and replicas. All data was successfully migrated to $destStorageClass and no action needs to be taken." fi # reset ekco scale if [ "$ekcoScaledDown" = "1" ] ; then kubectl -n kurl scale deploy ekc-operator --replicas=1 fi # reset prometheus scale if kubectl get namespace monitoring &>/dev/null; then if kubectl get prometheus -n monitoring k8s &>/dev/null; then kubectl scale deployment -n monitoring prometheus-operator --replicas=1 fi fi # print success message logSuccess "Migration from longhorn to $scProvisioner completed successfully!" report_addon_success "longhorn-to-$scProvisioner-migration" "v1" } # if PVCs and object store data have both been migrated from longhorn and longhorn is no longer specified in the kURL spec, remove longhorn function maybe_cleanup_longhorn() { if [ -z "$LONGHORN_VERSION" ]; then # Just continue if longhorn is installed. if ! kubectl get ns | grep -q longhorn-system; then return fi logStep "Removing Longhorn" if [ "$DID_MIGRATE_LONGHORN_PVCS" == "1" ]; then report_addon_start "longhorn-removal" "v1" remove_longhorn report_addon_success "longhorn-removal" "v1" return fi logFail "Unable to remove Longhorn." if [ "$DID_MIGRATE_LONGHORN_PVCS" != "1" ]; then logFail "Storage class migration did not succeed" fi fi } # longhorn_pvs_removed returns true when we can't find any pv using the longhorn csi driver. function longhorn_pvs_removed() { local pvs pvs=$(kubectl get pv -o=jsonpath='{.items[*].spec.csi.driver}' | grep "longhorn" | wc -l) [ "$pvs" = "0" ] } # remove_longhorn deletes everything longhorn releated: deployments, CR objects, and CRDs. function remove_longhorn() { # make sure there aren't any PVs using longhorn before deleting it log "Waiting for Longhorn PVs to be removed" if ! spinner_until 60 longhorn_pvs_removed; then # sometimes longhorn hangs and we need to restart kubelet to make it work again, we # are going to give this approach a try here before bailing out. logWarn "Some Longhorn PVs are still online, trying to restart kubelet." systemctl restart kubelet log "Waiting for Longhorn PVs to be removed" if ! spinner_until 60 longhorn_pvs_removed; then logFail "There are still PVs using Longhorn." logFail "Remove these PVs before continuing." kubectl get pv -o=jsonpath='{.items[*].spec.csi.driver}' | grep "longhorn" exit 1 fi fi # scale ekco to 0 replicas if it exists if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl scale deploy ekc-operator --replicas=0 log "Waiting for ekco pods to be removed" if ! spinner_until 120 ekco_pods_gone; then logWarn "Unable to scale down ekco operator" fi fi # remove longhorn volumes first so the operator can correctly delete them. log "Removing Longhorn volumes:" kubectl delete volumes.longhorn.io -n longhorn-system --all # once volumes have been gone we can remove all other longhorn CR objects. log "Removing Longhorn custom resource objects - this may take some time:" kubectl get crd | grep 'longhorn' | grep -v 'volumes' | awk '{ print $1 }' | xargs -I'{}' kubectl -n longhorn-system delete '{}' --all # delete longhorn CRDs log "Removing Longhorn custom resources:" kubectl get crd | grep 'longhorn' | awk '{ print $1 }' | xargs -I'{}' kubectl delete crd '{}' # delete longhorn ns kubectl delete ns longhorn-system # delete longhorn storageclass(es) log "Removing Longhorn StorageClasses" kubectl get storageclass | grep longhorn | awk '{ print $1 }' | xargs -I'{}' kubectl delete storageclass '{}' # scale ekco back to 1 replicas if it exists if kubernetes_resource_exists kurl deployment ekc-operator; then kubectl -n kurl scale deploy ekc-operator --replicas=1 fi logSuccess "Removed Longhorn successfully" } # longhorn_prepare_for_migration checks if longhorn is healthy and if it is, it will scale down the all pods mounting # longhorn volumes. if a failure happen during the preparation fase the migration won't be executed and the user will # receive a message to restore the cluster to its previous state. function longhorn_prepare_for_migration() { if "$DIR"/bin/kurl longhorn prepare-for-migration; then return 0 fi logFail "Preparation for longhorn migration failed. Please review the preceding messages for further details." logFail "During the preparation for Longhorn, some replicas may have been scaled down to 0. Would you like to" logFail "restore the system to its original state?" if confirmY; then log "Restoring Longhorn replicas to their original state" longhorn_restore_migration_replicas fi return 1 } # longhorn_restore_migration_replicas scales up all longhorn volumes, deployment and statefulset replicas to their # original values. function longhorn_restore_migration_replicas() { "$DIR"/bin/kurl longhorn rollback-migration-replicas } #!/bin/bash # kubernetes_upgrade_preflight checks if kubernetes should be upgraded, and if so prompts the user # to confirm the upgrade. function kubernetes_upgrade_preflight() { local desired_version="$KUBERNETES_VERSION" if ! kubernetes_upgrade_should_upgrade_kubernetes ; then return fi local current_version= current_version="$(kubernetes_upgrade_discover_min_kubernetes_version)" if ! kubernetes_upgrade_prompt "$current_version" "$desired_version" ; then bail "Not upgrading Kubernetes" fi # use CURRENT_KUBERNETES_VERSION as that is the lowest version on this node if ! kubernetes_upgrade_storage_check "$CURRENT_KUBERNETES_VERSION" "$desired_version" ; then bail "Not upgrading Kubernetes" fi } # report_upgrade_kubernetes starts the kubernetes upgrade process. function report_upgrade_kubernetes() { local desired_version="$KUBERNETES_VERSION" if ! kubernetes_upgrade_should_upgrade_kubernetes ; then enable_rook_ceph_operator return fi local current_version= current_version="$(kubernetes_upgrade_discover_min_kubernetes_version)" kubernetes_upgrade_report_upgrade_kubernetes "$current_version" "$desired_version" } # kubernetes_upgrade_discover_min_kubernetes_version will return the lowest kubernetes version on # the cluster. function kubernetes_upgrade_discover_min_kubernetes_version() { if [ -z "$CURRENT_KUBERNETES_VERSION" ]; then return fi # These versions are for the local primary semverParse "$CURRENT_KUBERNETES_VERSION" # shellcheck disable=SC2154 local min_minor="$minor" # shellcheck disable=SC2154 local min_patch="$patch" # Check for upgrades required on remote primaries for i in "${!KUBERNETES_REMOTE_PRIMARIES[@]}" ; do semverParse "${KUBERNETES_REMOTE_PRIMARY_VERSIONS[$i]}" if [ "$minor" -lt "$min_minor" ] || { [ "$minor" -eq "$min_minor" ] && [ "$patch" -lt "$min_patch" ]; }; then min_minor="$minor" min_patch="$patch" fi done # Check for upgrades required on secondaries for i in "${!KUBERNETES_SECONDARIES[@]}" ; do semverParse "${KUBERNETES_SECONDARY_VERSIONS[$i]}" if [ "$minor" -lt "$min_minor" ] || { [ "$minor" -eq "$min_minor" ] && [ "$patch" -lt "$min_patch" ]; }; then min_minor="$minor" min_patch="$patch" fi done echo "1.$min_minor.$min_patch" } # kubernetes_upgrade_report_upgrade_kubernetes reports the upgrade and starts the upgrade process. function kubernetes_upgrade_report_upgrade_kubernetes() { local current_version="$1" local desired_version="$2" local from_version= from_version="$(common_upgrade_version_to_major_minor "$current_version")" local kubernetes_upgrade_version="v1.0.0" # if you change this code, change the version report_addon_start "kubernetes_upgrade_${from_version}_to_${desired_version}" "$kubernetes_upgrade_version" export REPORTING_CONTEXT_INFO="kubernetes_upgrade_${from_version}_to_${desired_version} $kubernetes_upgrade_version" kubernetes_upgrade "$current_version" "$desired_version" export REPORTING_CONTEXT_INFO="" report_addon_success "kubernetes_upgrade_${from_version}_to_${desired_version}" "$kubernetes_upgrade_version" } # kubernetes_upgrade upgrades will fetch the add-on and load the images for the upgrade and finally # run the upgrade script. function kubernetes_upgrade() { local current_version="$1" local desired_version="$2" disable_rook_ceph_operator # when invoked in a subprocess the failure of this function will not cause the script to exit # sanity check that the version is valid common_upgrade_step_versions "${STEP_VERSIONS[*]}" "$current_version" "$desired_version" 1>/dev/null logStep "Upgrading Kubernetes from $current_version to $desired_version" common_upgrade_print_list_of_minor_upgrades "$current_version" "$desired_version" echo "This may take some time." kubernetes_upgrade_addon_fetch "$current_version" "$desired_version" kubernetes_upgrade_prompt_missing_assets "$current_version" "$desired_version" kubernetes_upgrade_do_kubernetes_upgrade "$current_version" "$desired_version" enable_rook_ceph_operator logSuccess "Successfully upgraded Kubernetes from $current_version to $desired_version" } # kubernetes_upgrade_do_kubernetes_upgrade will step through each minor version upgrade from # $current_version to $desired_version function kubernetes_upgrade_do_kubernetes_upgrade() { local current_version="$1" local desired_version="$2" local step= while read -r step ; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi if [ ! -d "$DIR/packages/kubernetes/$step/assets" ] ; then bail "Kubernetes version $step not found" fi logStep "Upgrading cluster to Kubernetes version $step" upgrade_kubernetes_local_master "$step" upgrade_kubernetes_remote_masters "$step" upgrade_kubernetes_workers "$step" # if this is not the last version in the loop, then delete the addon files to free up space if [ "$step" != "$desired_version" ]; then rm -f "$DIR/assets/kubernetes-$step.tar.gz" rm -rf "$DIR/packages/kubernetes/$step" fi # workaround as some code relies on this legacy label kubectl label --overwrite node --selector="node-role.kubernetes.io/control-plane" node-role.kubernetes.io/master= logSuccess "Cluster upgraded to Kubernetes version $step successfully" done <<< "$(common_upgrade_step_versions "${STEP_VERSIONS[*]}" "$current_version" "$desired_version")" if [ -n "$AIRGAP_MULTI_ADDON_PACKAGE_PATH" ]; then # delete the airgap package files to free up space rm -f "$AIRGAP_MULTI_ADDON_PACKAGE_PATH" fi } # kubernetes_upgrade_should_upgrade_kubernetes uses the KUBERNETES_UPGRADE environment variable set # by discoverCurrentKubernetesVersion() function kubernetes_upgrade_should_upgrade_kubernetes() { [ "$KUBERNETES_UPGRADE" = "1" ] } # kubernetes_upgrade_prompt prompts the user to confirm the kubernetes upgrade. function kubernetes_upgrade_prompt() { local current_version="$1" local desired_version="$2" logWarn "$(printf "This script will upgrade Kubernetes from %s to %s." "$current_version" "$desired_version")" logWarn "Upgrading Kubernetes will take some time." printf "Would you like to continue? " confirmY } # kubernetes_upgrade_storage_check verifies that enough disk space exists for the kubernetes # upgrade to complete successfully. function kubernetes_upgrade_storage_check() { local current_version="$1" local desired_version="$2" local archive_size= archive_size="$(kubernetes_upgrade_required_archive_size "$current_version" "$desired_version")" # 2x archive size for extracted files # 3.5x archive size for container images common_upgrade_storage_check "$archive_size" 2 $((7/2)) "Kubernetes" } # kubernetes_upgrade_required_archive_size will determine the approximate size of the archive that # will be downloaded to upgrade between the supplied kubernetes versions. The amount of space # required within $KURL_INSTALL_DIRECTORY and /var/lib/containerd or /var/lib/docker can then be # derived from this (2x archive size in kurl, 3.5x in containerd/docker). function kubernetes_upgrade_required_archive_size() { local current_version="$1" local desired_version="$2" # 934.8 MB is the size of the kubernetes-1.26.3.tar.gz archive which is the largest archive local bundle_size_upper_bounds=935 local total_archive_size=0 local step= while read -r step ; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi total_archive_size=$((total_archive_size + bundle_size_upper_bounds)) done <<< "$(common_upgrade_step_versions "${STEP_VERSIONS[*]}" "$current_version" "$desired_version")" echo "$total_archive_size" } # kubernetes_upgrade_addon_fetch will fetch all add-on versions from $current_version to # $desired_version. function kubernetes_upgrade_addon_fetch() { if [ "$AIRGAP" = "1" ]; then kubernetes_upgrade_addon_fetch_airgap "$@" else kubernetes_upgrade_addon_fetch_online "$@" fi } # kubernetes_upgrade_addon_fetch_online will fetch all add-on versions, one at a time, from # $current_version to $desired_version. function kubernetes_upgrade_addon_fetch_online() { local current_version="$1" local desired_version="$2" logStep "Downloading assets required for Kubernetes $current_version to $desired_version upgrade" local step= while read -r step ; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi kubernetes_upgrade_addon_fetch_online_step "kubernetes" "$step" done <<< "$(common_upgrade_step_versions "${STEP_VERSIONS[*]}" "$current_version" "$desired_version")" logSuccess "Assets loaded for Kubernetes $current_version to $desired_version upgrade" } # kubernetes_upgrade_addon_fetch_online_step will fetch an individual add-on version. function kubernetes_upgrade_addon_fetch_online_step() { local version="$2" kubernetes_get_host_packages_online "$version" } # kubernetes_upgrade_addon_fetch_airgap will prompt the user to fetch all add-on versions from # $current_version to $desired_version. function kubernetes_upgrade_addon_fetch_airgap() { local current_version="$1" local desired_version="$2" # the last version already included in the airgap bundle local version_less_one= version_less_one="$(common_upgrade_major_minor_less_one "$desired_version")" if kubernetes_upgrade_has_all_addon_version_packages "$current_version" "$version_less_one" ; then local node_missing_images= # shellcheck disable=SC2086 node_missing_images=$(kubernetes_upgrade_nodes_missing_images "$current_version" "$version_less_one" "$(get_local_node_name)" "") if [ -z "$node_missing_images" ]; then log "All assets required for Kubernetes $current_version to $desired_version upgrade are present on this node" return fi fi logStep "Downloading assets required for Kubernetes $current_version to $desired_version upgrade" local addon_versions=() local step= while read -r step ; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi addon_versions+=( "kubernetes-$step" ) done <<< "$(common_upgrade_step_versions "${STEP_VERSIONS[*]}" "$current_version" "$version_less_one")" addon_fetch_multiple_airgap "${addon_versions[@]}" logSuccess "Assets loaded for Kubernetes $current_version to $desired_version upgrade" } # kubernetes_upgrade_has_all_addon_version_packages will return 1 if any add-on versions are # missing that are necessary to perform the upgrade. function kubernetes_upgrade_has_all_addon_version_packages() { local current_version="$1" local desired_version="$2" local step= while read -r step ; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi if [ ! -f "packages/kubernetes/$step/Manifest" ]; then return 1 fi done <<< "$(common_upgrade_step_versions "${STEP_VERSIONS[*]}" "$current_version" "$desired_version")" return 0 } # kubernetes_upgrade_prompt_missing_assets prompts the user to run the command to load assets on # all remote nodes before proceeding. function kubernetes_upgrade_prompt_missing_assets() { local current_version="$1" local desired_version="$2" # online installs will load assets as part of the upgrade.sh script if [ "$AIRGAP" != "1" ]; then return fi # if we are only upgrading one minor version, then we don't need to prompt for assets as they # are part of the airgap bundle local version_less_one= version_less_one="$(common_upgrade_major_minor_less_one "$desired_version")" if [ "$(common_upgrade_compare_versions "$current_version" "$version_less_one")" -ge "0" ]; then return fi # always prompt on all nodes because assets are not only images common_prompt_task_missing_assets \ "$(kubernetes_remote_nodes | awk '{ print $1 }')" \ "$current_version" "$desired_version" "Kubernetes" "kubernetes-upgrade-load-assets" } # kubernetes_upgrade_nodes_missing_images will print a list of nodes that are missing images for # the given kubernetes versions. function kubernetes_upgrade_nodes_missing_images() { local current_version="$1" local desired_version="$2" local target_host="$3" local exclude_hosts="$4" local images_list= images_list="$(kubernetes_upgrade_images_list "$current_version" "$desired_version")" if [ -z "$images_list" ]; then return fi kubernetes_nodes_missing_images "$images_list" "$target_host" "$exclude_hosts" } # kubernetes_upgrade_images_list will print a list of images for the given kubernetes versions. function kubernetes_upgrade_images_list() { local current_version="$1" local desired_version="$2" local images_list= local step= while read -r step ; do if [ -z "$step" ] || [ "$step" = "0.0.0" ]; then continue fi images_list="$(common_upgrade_merge_images_list \ "$images_list" \ "$(common_list_images_in_manifest_file "packages/kubernetes/$step/Manifest")" \ )" done <<< "$(common_upgrade_step_versions "${STEP_VERSIONS[*]}" "$current_version" "$desired_version")" echo "$images_list" } # kubernetes_upgrade_tasks_load_assets is called from tasks.sh to load assets on remote notes for the # kubernetes upgrade. function kubernetes_upgrade_tasks_load_assets() { local from_version= local to_version= local airgap= common_upgrade_tasks_params "$@" common_task_require_param "from-version" "$from_version" common_task_require_param "to-version" "$to_version" if [ "$airgap" = "1" ]; then export AIRGAP=1 fi export KUBECONFIG=/etc/kubernetes/admin.conf download_util_binaries if ! kubernetes_upgrade_storage_check "$from_version" "$to_version" ; then bail "Failed storage check" fi if ! kubernetes_upgrade_addon_fetch "$from_version" "$to_version" ; then bail "Failed to load assets" fi } function upgrade_kubeadm() { local k8sVersion=$1 upgrade_maybe_remove_kubeadm_network_plugin_flag "$k8sVersion" cp -f "$DIR/packages/kubernetes/$k8sVersion/assets/kubeadm" /usr/bin/ chmod a+rx /usr/bin/kubeadm } KUBERNETES_UPGRADE_IGNORE_PREFLIGHT_ERRORS="${KUBERNETES_UPGRADE_IGNORE_PREFLIGHT_ERRORS:-}" function upgrade_kubernetes_local_master() { local targetK8sVersion="$1" local nodeName= nodeName="$(get_local_node_name)" # shellcheck disable=SC2034 local upgrading_kubernetes=true local nodeVersion= nodeVersion="$(kubectl get node --no-headers "$nodeName" 2>/dev/null | awk '{ print $5 }' | sed 's/v//')" if [ -z "$nodeVersion" ]; then nodeVersion="$(discover_local_kubernetes_version)" fi # check if the node is already at the target version semverCompare "$nodeVersion" "$targetK8sVersion" if [ "$SEMVER_COMPARE_RESULT" -ge "0" ]; then log "Node $nodeName is already at Kubernetes version $targetK8sVersion" return 0 fi logStep "Upgrading local node to Kubernetes version $targetK8sVersion" kubernetes_load_images "$targetK8sVersion" upgrade_kubeadm "$targetK8sVersion" ( set -x; kubeadm upgrade plan "v${targetK8sVersion}" --ignore-preflight-errors="$KUBERNETES_UPGRADE_IGNORE_PREFLIGHT_ERRORS" ) printf "%bDrain local node and apply upgrade? %b" "$YELLOW" "$NC" confirmY kubernetes_drain "$nodeName" maybe_patch_node_cri_socket_annotation "$nodeName" spinner_kubernetes_api_stable # ignore-preflight-errors, do not fail on fail to pull images for airgap ( set -x; kubeadm upgrade apply "v$targetK8sVersion" --yes --force --ignore-preflight-errors=all ) upgrade_etcd_image_18 "$targetK8sVersion" # kubelet command line argument, '--container-runtime', was removed in Kubernetes 1.27 upgrade_should_remove_container_runtime_flag "$targetK8sVersion" kubernetes_install_host_packages "$targetK8sVersion" systemctl daemon-reload systemctl restart kubelet spinner_kubernetes_api_stable kubectl uncordon "$nodeName" upgrade_delete_node_flannel "$nodeName" # force deleting the cache because the api server will use the stale API versions after kubeadm upgrade rm -rf "$HOME/.kube" spinner_until 120 kubernetes_node_has_version "$nodeName" "$targetK8sVersion" spinner_until 120 kubernetes_all_nodes_ready logSuccess "Local node upgraded to Kubernetes version $targetK8sVersion" } function upgrade_kubernetes_remote_masters() { local k8sVersion="$1" while read -r node ; do local nodeName= nodeName=$(echo "$node" | awk '{ print $1 }') logStep "Upgrading remote primary node $nodeName to Kubernetes version $k8sVersion" upgrade_kubernetes_remote_node "$node" "$k8sVersion" logSuccess "Remote primary node $nodeName upgraded to Kubernetes version $k8sVersion" done < <(try_1m kubernetes_remote_masters) spinner_until 120 kubernetes_all_nodes_ready } function upgrade_kubernetes_workers() { local k8sVersion="$1" while read -r node ; do local nodeName= nodeName=$(echo "$node" | awk '{ print $1 }') logStep "Upgrading remote worker node $nodeName to Kubernetes version $k8sVersion" upgrade_kubernetes_remote_node "$node" "$k8sVersion" logSuccess "Remote worker node $nodeName upgraded to Kubernetes version $k8sVersion" done < <(try_1m kubernetes_workers) } function upgrade_kubernetes_remote_node() { # one line of output from `kubectl get nodes` local node="$1" local targetK8sVersion="$2" local nodeName= nodeName=$(echo "$node" | awk '{ print $1 }') local nodeVersion= nodeVersion="$(echo "$node" | awk '{ print $5 }' | sed 's/v//' )" # check if the node is already at the target version semverCompare "$nodeVersion" "$targetK8sVersion" if [ "$SEMVER_COMPARE_RESULT" -ge "0" ]; then log "Node $nodeName is already at Kubernetes version $targetK8sVersion" return 0 fi DOCKER_REGISTRY_IP=$(kubectl -n kurl get service registry -o=jsonpath='{@.spec.clusterIP}' 2>/dev/null || echo "") printf "\n%bDrain node $nodeName to prepare for upgrade? %b" "$YELLOW" "$NC" confirmY kubernetes_drain "$nodeName" local common_flags common_flags="${common_flags}$(get_docker_registry_ip_flag "${DOCKER_REGISTRY_IP}")" local no_proxy_addresses="" [ -n "$ADDITIONAL_NO_PROXY_ADDRESSES" ] && no_proxy_addresses="$ADDITIONAL_NO_PROXY_ADDRESSES" [ -n "$service_cidr" ] && no_proxy_addresses="${no_proxy_addresses:+$no_proxy_addresses,}$service_cidr" [ -n "$pod_cidr" ] && no_proxy_addresses="${no_proxy_addresses:+$no_proxy_addresses,}$pod_cidr" [ -n "$no_proxy_addresses" ] && common_flags="${common_flags}$(get_additional_no_proxy_addresses_flag 1 "$no_proxy_addresses")" common_flags="${common_flags}$(get_kurl_install_directory_flag "${KURL_INSTALL_DIRECTORY_FLAG}")" common_flags="${common_flags}$(get_remotes_flags)" printf "\n\n\tRun the upgrade script on remote node to proceed: %b%s%b\n\n" "$GREEN" "$nodeName" "$NC" if [ "$AIRGAP" = "1" ]; then local command= command=$(printf "cat ./upgrade.sh | sudo bash -s airgap kubernetes-version=%s%s" "$targetK8sVersion" "$common_flags") echo "$command yes" > "$DIR/remotes/$nodeName" printf "\t%b%s%b\n\n" "$GREEN" "$command" "$NC" else local prefix= prefix="$(build_installer_prefix "${INSTALLER_ID}" "${KURL_VERSION}" "${KURL_URL}" "${PROXY_ADDRESS}" "${PROXY_HTTPS_ADDRESS}")" local command= command=$(printf "%supgrade.sh | sudo bash -s kubernetes-version=%s%s" "$prefix" "$targetK8sVersion" "$common_flags") echo "$command yes" > "$DIR/remotes/$nodeName" printf "\t%b %s%b\n\n" "$GREEN" "$command" "$NC" fi rm -rf "$HOME/.kube" spinner_until -1 kubernetes_node_has_version "$nodeName" "$targetK8sVersion" logSuccess "Kubernetes $targetK8sVersion detected on $nodeName" kubectl uncordon "$nodeName" upgrade_delete_node_flannel "$nodeName" spinner_until 120 kubernetes_all_nodes_ready } # In k8s 1.18 the etcd image tag changed from 3.4.3 to 3.4.3-0 but kubeadm does not rewrite the # etcd manifest to use the new tag. When kubeadm init is run after the upgrade it switches to the # tag and etcd takes a few minutes to restart, which often results in kubeadm init failing. This # forces use of the updated tag so that the restart of etcd happens during upgrade when the node is # already drained function upgrade_etcd_image_18() { semverParse "$1" if [ "$minor" != "18" ]; then return 0 fi local etcd_tag= etcd_tag=$(kubeadm config images list 2>/dev/null | grep etcd | awk -F':' '{ print $NF }') sed -i "s/image: k8s.gcr.io\/etcd:.*/image: k8s.gcr.io\/etcd:$etcd_tag/" /etc/kubernetes/manifests/etcd.yaml } # Workaround to fix "kubeadm upgrade node" error: # "error execution phase preflight: docker is required for container runtime: exec: "docker": executable file not found in $PATH" # See https://github.com/kubernetes/kubeadm/issues/2364 function maybe_patch_node_cri_socket_annotation() { local node="$1" if [ -n "$DOCKER_VERSION" ] || [ -z "$CONTAINERD_VERSION" ]; then return fi if kubectl get node "$node" -ojsonpath='{.metadata.annotations.kubeadm\.alpha\.kubernetes\.io/cri-socket}' | grep -q "dockershim.sock" ; then kubectl annotate node "$node" --overwrite "kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock" fi } # When there has been a migration from Docker to Containerd the kubeadm-flags.env file may contain # the flag "--network-plugin" which has been removed as of Kubernetes 1.24 and causes the Kubelet # to fail with "Error: failed to parse kubelet flag: unknown flag: --network-plugin". This function # will remove the erroneous flag from the file. function upgrade_maybe_remove_kubeadm_network_plugin_flag() { local k8sVersion=$1 if [ "$(kubernetes_version_minor "$k8sVersion")" -lt "24" ]; then return fi sed -i 's/ \?--network-plugin \?[^ "]*//' /var/lib/kubelet/kubeadm-flags.env } # delete the flannel pod on the node so that CNI plugin binaries are recreated # workaround for https://github.com/kubernetes/kubernetes/issues/115629 function upgrade_delete_node_flannel() { local node="$1" if kubectl get ns 2>/dev/null | grep -q kube-flannel; then kubectl delete pod -n kube-flannel --field-selector="spec.nodeName=$node" fi } # Kubernetes 1.24 deprecated the '--container-runtime' kubelet argument in 1.24 and removed it in 1.27 # See: https://kubernetes.io/blog/2023/03/17/upcoming-changes-in-kubernetes-v1-27/#removal-of-container-runtime-command-line-argument function upgrade_should_remove_container_runtime_flag() { local k8sVersion=$1 if [ "$(kubernetes_version_minor "$k8sVersion")" -ge "27" ]; then sed -i 's/--container-runtime=remote //' "$KUBELET_FLAGS_FILE" fi } function download_util_binaries() { if [ -z "$AIRGAP" ] && [ -n "$DIST_URL" ]; then package_download "${KURL_BIN_UTILS_FILE}" tar xzf "$(package_filepath "${KURL_BIN_UTILS_FILE}")" --no-same-owner fi export BIN_KURL=$DIR/bin/kurl BIN_SYSTEM_CONFIG=$DIR/bin/config BIN_YAMLUTIL=$DIR/bin/yamlutil BIN_DOCKER_CONFIG=$DIR/bin/docker-config BIN_SUBNET=$DIR/bin/subnet BIN_INSTALLERMERGE=$DIR/bin/installermerge BIN_YAMLTOBASH=$DIR/bin/yamltobash BIN_BASHTOYAML=$DIR/bin/bashmerge BIN_PVMIGRATE=$DIR/bin/pvmigrate export BIN_ROOK_PVMIGRATOR=$DIR/bin/rook-pv-migrator mkdir -p /tmp/kurl-bin-utils/scripts CONFIGURE_SELINUX_SCRIPT=/tmp/kurl-bin-utils/scripts/configure_selinux.sh CONFIGURE_FIREWALLD_SCRIPT=/tmp/kurl-bin-utils/scripts/configure_firewalld.sh CONFIGURE_IPTABLES_SCRIPT=/tmp/kurl-bin-utils/scripts/configure_iptables.sh mkdir -p /tmp/kurl-bin-utils/specs MERGED_YAML_SPEC=/tmp/kurl-bin-utils/specs/merged.yaml VENDOR_PREFLIGHT_SPEC=/tmp/kurl-bin-utils/specs/vendor-preflight.yaml PARSED_YAML_SPEC=/tmp/kurl-bin-utils/scripts/variables.sh } function apply_bash_flag_overrides() { if [ -n "$1" ]; then $BIN_BASHTOYAML -c $MERGED_YAML_SPEC -f "$*" fi } function parse_yaml_into_bash_variables() { $BIN_YAMLTOBASH -i $MERGED_YAML_SPEC -b $PARSED_YAML_SPEC source $PARSED_YAML_SPEC rm $PARSED_YAML_SPEC } parse_kubernetes_target_version() { semverParse "$KUBERNETES_VERSION" KUBERNETES_TARGET_VERSION_MAJOR="$major" KUBERNETES_TARGET_VERSION_MINOR="$minor" KUBERNETES_TARGET_VERSION_PATCH="$patch" } function yaml_airgap() { # this is needed because the parsing for yaml comes after the first occasion where the $AIRGAP flag is used # we also account for if $INSTALLER_YAML spec has "$AIRGAP and "INSTALLER_SPEC_FILE spec turns it off" if [[ "$INSTALLER_YAML" =~ "airgap: true" ]]; then AIRGAP="1" fi if [ -n "$INSTALLER_SPEC_FILE" ]; then if grep -q "airgap: true" $INSTALLER_SPEC_FILE; then AIRGAP="1" fi if grep -q "airgap: false" $INSTALLER_SPEC_FILE; then AIRGAP="" fi fi } function get_patch_yaml() { while [ "$1" != "" ]; do _param="$(echo "$1" | cut -d= -f1)" _value="$(echo "$1" | grep '=' | cut -d= -f2-)" case $_param in installer-spec-file) if [ -n "$_value" ]; then INSTALLER_SPEC_FILE="$(readlink -f "$_value")" # resolve relative paths before we pushd fi ;; additional-no-proxy-addresses) ;; airgap) AIRGAP="1" ;; kurl-registry-ip) KURL_REGISTRY_IP="$_value" ;; cert-key) ;; control-plane) ;; docker-registry-ip) ;; ekco-address) if [ -z "$EKCO_ADDRESS" ]; then EKCO_ADDRESS="$_value" fi ;; ekco-auth-token) if [ -z "$EKCO_AUTH_TOKEN" ]; then EKCO_AUTH_TOKEN="$_value" fi ;; ekco-enable-internal-load-balancer) ;; ha) ;; kubernetes-cis-compliance) ;; kubernetes-cluster-name) ;; aws-exclude-storage-class) ;; ignore-remote-load-images-prompt) ;; ignore-remote-upgrade-prompt) ;; container-log-max-size) ;; container-log-max-files) ;; kubernetes-max-pods-per-node) ;; kubeadm-token) ;; kubeadm-token-ca-hash) ;; kubernetes-load-balancer-use-first-primary) ;; kubernetes-master-address) ;; kubernetes-version) ;; kubernetes-init-ignore-preflight-errors) ;; kubernetes-upgrade-ignore-preflight-errors) ;; kurl-install-directory) if [ -n "$_value" ]; then KURL_INSTALL_DIRECTORY_FLAG="${_value}" KURL_INSTALL_DIRECTORY="$(realpath ${_value})/kurl" fi ;; labels) NODE_LABELS="$_value" ;; load-balancer-address) ;; storage-migration-ready-timeout) STORAGE_MIGRATION_READY_TIMEOUT="${_value}" ;; # Legacy Command preflight-ignore) ;; host-preflight-ignore) ;; # Legacy Command preflight-ignore-warnings) ;; host-preflight-enforce-warnings) ;; dismiss-host-packages-preflight) # possibly add this to the spec # shellcheck disable=SC2034 KURL_DISMISS_HOST_PACKAGES_PREFLIGHT=1 ;; preserve-docker-config) ;; preserve-firewalld-config) ;; preserve-iptables-config) ;; preserve-selinux-config) ;; public-address) ;; private-address) ;; yes) ASSUME_YES=1 ;; auto-upgrades-enabled) # no longer supported ;; primary-host) if [ -z "$PRIMARY_HOST" ]; then PRIMARY_HOST="$_value" else PRIMARY_HOST="$PRIMARY_HOST,$_value" fi ;; secondary-host) if [ -z "$SECONDARY_HOST" ]; then SECONDARY_HOST="$_value" else SECONDARY_HOST="$SECONDARY_HOST,$_value" fi ;; # deprecated flag force-reapply-addons) logWarn "WARN: 'force-reapply-addon' option is deprecated" ;; skip-system-package-install) SKIP_SYSTEM_PACKAGE_INSTALL=1 ;; # legacy command alias exclude-builtin-preflights) EXCLUDE_BUILTIN_HOST_PREFLIGHTS=1 ;; exclude-builtin-host-preflights) EXCLUDE_BUILTIN_HOST_PREFLIGHTS=1 ;; app-version-label) KOTSADM_APPLICATION_VERSION_LABEL="$_value" ;; ipv6) IPV6_ONLY=1 ;; velero-restic-timeout) VELERO_RESTIC_TIMEOUT="$_value" ;; velero-server-flags) VELERO_SERVER_FLAGS="$_value" ;; *) echo >&2 "Error: unknown parameter \"$_param\"" exit 1 ;; esac shift done } function merge_yaml_specs() { if [ -z "$INSTALLER_SPEC_FILE" ] && [ -z "$INSTALLER_YAML" ]; then echo "no yaml spec found" bail fi if [ -z "$INSTALLER_YAML" ]; then cp -f $INSTALLER_SPEC_FILE $MERGED_YAML_SPEC ONLY_APPLY_MERGED=1 return fi if [ -z "$INSTALLER_SPEC_FILE" ]; then cat > $MERGED_YAML_SPEC < /tmp/vendor_kurl_installer_spec_docker.yaml </dev/null 2>&1 ; then if [ -n "$(kubeadm_cluster_configuration | grep 'controlPlaneEndpoint:' | sed 's/controlPlaneEndpoint: \|"//g')" ]; then HA_CLUSTER=1 fi fi } function get_addon_config() { local addon_name=$1 addon_name=$(kebab_to_camel "$addon_name") $BIN_YAMLUTIL -j -fp $MERGED_YAML_SPEC -jf "spec.$addon_name" } #!/bin/bash function render_yaml() { eval "echo \"$(cat $DIR/yaml/$1)\"" } function render_yaml_file() { eval "echo \"$(cat $1)\"" } function render_yaml_file_2() { local file="$1" if [ ! -f "$file" ]; then logFail "File $file does not exist" return 1 fi local data= data=$(< "$file") local delimiter="__apply_shell_expansion_delimiter__" local command="cat <<$delimiter"$'\n'"$data"$'\n'"$delimiter" eval "$command" } function render_file() { eval "echo \"$(cat $1)\"" } function insert_patches_strategic_merge() { local kustomization_file="$1" local patch_file="$2" # we care about the current kubernetes version here, not the target version - this function can be called from pre-init addons local kubeletVersion= kubeletVersion="$(kubelet_version)" semverParse "$kubeletVersion" local kubeletMinor="$minor" # # Kubernetes 1.27 uses kustomize v5 which dropped support for old, legacy style patches # # See: https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.27.md#changelog-since-v1270 # if [ "$kubeletMinor" -ge "27" ]; then # if [[ $kustomization_file =~ "prometheus" ]] || [[ $kustomization_file =~ "rook" ]]; then # # TODO: multi-doc patches is not currently supported in kustomize v5 # # continue using the deprecated 'patchesStrategicMerge' field until this is fixed # # Ref: https://github.com/kubernetes-sigs/kustomize/issues/5040 # if ! grep -q "patchesStrategicMerge" "$kustomization_file"; then # echo "patchesStrategicMerge:" >> "$kustomization_file" # fi # sed -i "/patchesStrategicMerge.*/a - $patch_file" "$kustomization_file" # else # if ! grep -q "^patches:" "$kustomization_file"; then # echo "patches:" >> "$kustomization_file" # fi # sed -i "/patches:/a - path: $patch_file" "$kustomization_file" # fi # return # fi if ! grep -q "patchesStrategicMerge" "$kustomization_file"; then echo "patchesStrategicMerge:" >> "$kustomization_file" fi sed -i "/patchesStrategicMerge.*/a - $patch_file" "$kustomization_file" } function insert_resources() { local kustomization_file="$1" local resource_file="$2" if ! grep -q "resources[ \"]*:" "$kustomization_file"; then echo "resources:" >> "$kustomization_file" fi sed -i "/resources:.*/a - $resource_file" "$kustomization_file" } function insert_bases() { local kustomization_file="$1" local base_file="$2" local kubectl_client_minor_version= if commandExists "kubectl" ; then kubectl_client_minor_version="$(kubectl_client_version | cut -d '.' -f2)" else kubectl_client_minor_version="$(echo "$KUBERNETES_VERSION" | cut -d '.' -f2)" fi # bases was deprecated in kustomize v2.1.0 in favor of resources # https://github.com/kubernetes-sigs/kustomize/blob/661743c7e5bd8c3d9d6866b6bc0a6f0e0b0512eb/site/content/en/blog/releases/v2.1.0.md # https://github.com/kubernetes-sigs/kustomize#kubectl-integration # Kubectl version: v1.14-v1.20, Kustomize version: v2.0.3 if [ -n "$kubectl_client_minor_version" ] && [ "$kubectl_client_minor_version" -gt "20" ]; then insert_resources "$kustomization_file" "$base_file" return fi if ! grep -q "bases[ \"]*:" "$kustomization_file"; then echo "bases:" >> "$kustomization_file" fi sed -i "/bases:.*/a - $base_file" "$kustomization_file" } function insert_patches_json_6902() { local kustomization_file="$1" local patch_file="$2" local group="$3" local version="$4" local kind="$5" local name="$6" local namespace="$7" if ! grep -q "patchesJson6902" "$kustomization_file"; then echo "patchesJson6902:" >> "$kustomization_file" fi # 'fourspace_' and 'twospace_' are used because spaces at the beginning of each line are stripped sed -i "/patchesJson6902.*/a- target:\n\ fourspace_ group: $group\n\ fourspace_ version: $version\n\ fourspace_ kind: $kind\n\ fourspace_ name: $name\n\ fourspace_ namespace: $namespace\n\ twospace_ path: $patch_file" "$kustomization_file" sed -i "s/fourspace_ / /" "$kustomization_file" sed -i "s/twospace_ / /" "$kustomization_file" } function setup_kubeadm_kustomize() { local kubeadm_exclude= local kubeadm_conf_api= local kubeadm_cluster_config_v1beta2_file="kubeadm-cluster-config-v1beta2.yml" local kubeadm_cluster_config_v1beta3_file="kubeadm-cluster-config-v1beta3.yml" local kubeadm_init_config_v1beta2_file="kubeadm-init-config-v1beta2.yml" local kubeadm_init_config_v1beta3_file="kubeadm-init-config-v1beta3.yml" local kubeadm_join_config_v1beta2_file="kubeadm-join-config-v1beta2.yaml" local kubeadm_join_config_v1beta3_file="kubeadm-join-config-v1beta3.yaml" local kubeadm_init_src="$DIR/kustomize/kubeadm/init-orig" local kubeadm_join_src="$DIR/kustomize/kubeadm/join-orig" local kubeadm_init_dst="$DIR/kustomize/kubeadm/init" local kubeadm_join_dst="$DIR/kustomize/kubeadm/join" kubeadm_conf_api=$(kubeadm_conf_api_version) # Clean up the source directories for the kubeadm kustomize resources and # patches. rm -rf "$DIR/kustomize/kubeadm/init" rm -rf "$DIR/kustomize/kubeadm/join" rm -rf "$DIR/kustomize/kubeadm/init-patches" rm -rf "$DIR/kustomize/kubeadm/join-patches" # Kubernete 1.26+ will use kubeadm/v1beta3 API if [ "$KUBERNETES_TARGET_VERSION_MINOR" -ge "26" ]; then # only include kubeadm/v1beta3 resources kubeadm_exclude=("$kubeadm_cluster_config_v1beta2_file" "$kubeadm_init_config_v1beta2_file" "$kubeadm_join_config_v1beta2_file") else # only include kubeadm/v1beta2 resources kubeadm_exclude=("$kubeadm_cluster_config_v1beta3_file" "$kubeadm_init_config_v1beta3_file" "$kubeadm_join_config_v1beta3_file") fi # copy kubeadm kustomize resources copy_kustomize_kubeadm_resources "$kubeadm_init_src" "$kubeadm_init_dst" "${kubeadm_exclude[@]}" copy_kustomize_kubeadm_resources "$kubeadm_join_src" "$kubeadm_join_dst" "${kubeadm_exclude[@]}" # tell kustomize which resources to generate # NOTE: 'eval' is used so that variables embedded within variables can be rendered correctly in the shell eval insert_resources "$kubeadm_init_dst/kustomization.yaml" "\$kubeadm_cluster_config_${kubeadm_conf_api}_file" eval insert_resources "$kubeadm_init_dst/kustomization.yaml" "\$kubeadm_init_config_${kubeadm_conf_api}_file" eval insert_resources "$kubeadm_join_dst/kustomization.yaml" "\$kubeadm_join_config_${kubeadm_conf_api}_file" # create kubeadm kustomize patches directories mkdir -p "$DIR/kustomize/kubeadm/init-patches" mkdir -p "$DIR/kustomize/kubeadm/join-patches" if [ -n "$USE_STANDARD_PORT_RANGE" ]; then sed -i 's/80-60000/30000-32767/g' "$DIR/kustomize/kubeadm/init/kubeadm-cluster-config-$kubeadm_conf_api.yml" fi } # copy_kustomize_kubeadm_resources copies kubeadm kustomize resources # from source ($1) to destination ($2) and excludes files specified as # variable number of arguments. # E.g. copy_kustomize_kubeadm_resources \ # "/var/lib/kurl/kustomize/kubeadm/init-orig" \ # "/var/lib/kurl/kustomize/kubeadm/init" \ # "kubeadm-cluster-config-v1beta2.yml" \ # "kubeadm-init-config-v1beta2.yml" \ # "kubeadm-join-config-v1beta2.yml" function copy_kustomize_kubeadm_resources() { local kustomize_kubeadm_src_dir=$1 local kustomize_kubeadm_dst_dir=$2 local excluded_files=("${@:3}") # ensure destination exist mkdir -p "$kustomize_kubeadm_dst_dir" # copy kustomize resources from source to destination directory # but exclude files in $excluded_files. for file in "$kustomize_kubeadm_src_dir"/*; do filename=$(basename "$file") excluded=false for excluded_file in "${excluded_files[@]}"; do if [ "$filename" = "$excluded_file" ]; then excluded=true break fi done if ! $excluded; then cp "$file" "$kustomize_kubeadm_dst_dir" fi done } function apply_installer_crd() { INSTALLER_CRD_DEFINITION="$DIR/kurlkinds/cluster.kurl.sh_installers.yaml" kubectl apply -f "$INSTALLER_CRD_DEFINITION" if [ -z "$ONLY_APPLY_MERGED" ] && [ -n "$INSTALLER_YAML" ]; then ORIGINAL_INSTALLER_SPEC=/tmp/kurl-bin-utils/specs/original.yaml cat > $ORIGINAL_INSTALLER_SPEC </dev/null 2>&1 || true done if [ "$DOCKER_BRIDGE" != "$BRIDGE" ] ; then kubeadm_run_iptables -t filter -D FORWARD -i $DOCKER_BRIDGE -o $BRIDGE -j DROP 2>/dev/null || true fi kubeadm_run_iptables -t filter -D INPUT -d 127.0.0.1/32 -p tcp --dport 6784 -m addrtype ! --src-type LOCAL -m conntrack ! --ctstate RELATED,ESTABLISHED -m comment --comment "Block non-local access to Weave Net control port" -j DROP >/dev/null 2>&1 || true kubeadm_run_iptables -t filter -D INPUT -i $DOCKER_BRIDGE -p udp --dport 53 -j ACCEPT >/dev/null 2>&1 || true kubeadm_run_iptables -t filter -D INPUT -i $DOCKER_BRIDGE -p tcp --dport 53 -j ACCEPT >/dev/null 2>&1 || true if [ -n "$DOCKER_VERSION" ]; then DOCKER_BRIDGE_IP=$(docker run --rm --pid host --net host --privileged -v /var/run/docker.sock:/var/run/docker.sock --entrypoint=/usr/bin/weaveutil $WEAVEEXEC_IMAGE:$WEAVE_TAG bridge-ip $DOCKER_BRIDGE) kubeadm_run_iptables -t filter -D INPUT -i $DOCKER_BRIDGE -p tcp --dst $DOCKER_BRIDGE_IP --dport $PORT -j DROP >/dev/null 2>&1 || true kubeadm_run_iptables -t filter -D INPUT -i $DOCKER_BRIDGE -p udp --dst $DOCKER_BRIDGE_IP --dport $PORT -j DROP >/dev/null 2>&1 || true kubeadm_run_iptables -t filter -D INPUT -i $DOCKER_BRIDGE -p udp --dst $DOCKER_BRIDGE_IP --dport $(($PORT + 1)) -j DROP >/dev/null 2>&1 || true fi kubeadm_run_iptables -t filter -D FORWARD -i $BRIDGE ! -o $BRIDGE -j ACCEPT 2>/dev/null || true kubeadm_run_iptables -t filter -D FORWARD -o $BRIDGE -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT 2>/dev/null || true kubeadm_run_iptables -t filter -D FORWARD -i $BRIDGE -o $BRIDGE -j ACCEPT 2>/dev/null || true kubeadm_run_iptables -F WEAVE-NPC >/dev/null 2>&1 || true kubeadm_run_iptables -t filter -D FORWARD -o $BRIDGE -j WEAVE-NPC 2>/dev/null || true kubeadm_run_iptables -t filter -D FORWARD -o $BRIDGE -m state --state NEW -j NFLOG --nflog-group 86 2>/dev/null || true kubeadm_run_iptables -t filter -D FORWARD -o $BRIDGE -j DROP 2>/dev/null || true kubeadm_run_iptables -X WEAVE-NPC >/dev/null 2>&1 || true kubeadm_run_iptables -F WEAVE-EXPOSE >/dev/null 2>&1 || true kubeadm_run_iptables -t filter -D FORWARD -o $BRIDGE -j WEAVE-EXPOSE 2>/dev/null || true kubeadm_run_iptables -X WEAVE-EXPOSE >/dev/null 2>&1 || true kubeadm_run_iptables -t nat -F WEAVE >/dev/null 2>&1 || true kubeadm_run_iptables -t nat -D POSTROUTING -j WEAVE >/dev/null 2>&1 || true kubeadm_run_iptables -t nat -D POSTROUTING -o $BRIDGE -j ACCEPT >/dev/null 2>&1 || true kubeadm_run_iptables -t nat -X WEAVE >/dev/null 2>&1 || true for LOCAL_IFNAME in $(ip link show | grep v${CONTAINER_IFNAME}pl | cut -d ' ' -f 2 | tr -d ':') ; do ip link del ${LOCAL_IFNAME%@*} >/dev/null 2>&1 || true done } function kubeadm_run_iptables() { # -w is recent addition to iptables if [ -z "$CHECKED_IPTABLES_W" ] ; then iptables -S -w >/dev/null 2>&1 && IPTABLES_W=-w CHECKED_IPTABLES_W=1 fi iptables $IPTABLES_W "$@" } function kubeadm_containerd_restart() { systemctl restart containerd } function kubeadm_registry_containerd_configure() { local registry_ip="$1" local server="$registry_ip" if [ "$IPV6_ONLY" = "1" ]; then server="registry.kurl.svc.cluster.local" sed -i '/registry\.kurl\.svc\.cluster\.local/d' /etc/hosts echo "$registry_ip $server" >> /etc/hosts fi if grep -Fq "plugins.\"io.containerd.grpc.v1.cri\".registry.configs.\"${server}\".tls" /etc/containerd/config.toml; then echo "Registry ${server} TLS already configured for containerd" return 0 fi cat >> /etc/containerd/config.toml < /tmp/k8s-healthz.out || true if grep -q "ok" /tmp/k8s-healthz.out; then rm /tmp/k8s-healthz.out return 0 fi rm /tmp/k8s-healthz.out return 1 } function kubeadm_conf_api_version() { # Get kubeadm api version from the runtime # Enforce the use of kubeadm.k8s.io/v1beta3 api version beginning with Kubernetes 1.26+ local kubeadm_v1beta3_min_version= kubeadm_v1beta3_min_version="26" if [ -n "$KUBERNETES_TARGET_VERSION_MINOR" ]; then if [ "$KUBERNETES_TARGET_VERSION_MINOR" -ge "$kubeadm_v1beta3_min_version" ]; then echo "v1beta3" else echo "v1beta2" fi else # ################################ NOTE ########################################## # # get the version from an existing cluster when the installer is not run # # i.e. this is meant to handle cases where kubeadm config is patched from tasks.sh # semverParse "$(kubeadm version --output=short | sed 's/v//')" # shellcheck disable=SC2154 local kube_current_version_minor="$minor" if [ "$kube_current_version_minor" -ge "$kubeadm_v1beta3_min_version" ]; then echo "v1beta3" else echo "v1beta2" fi fi } # kubeadm_customize_config mutates a kubeadm configuration file for Kubernetes compatibility purposes function kubeadm_customize_config() { local kubeadm_patch_config=$1 # Templatize the api version for kubeadm patches # shellcheck disable=SC2016 sed -i 's|kubeadm.k8s.io/v1beta.*|kubeadm.k8s.io/$(kubeadm_conf_api_version)|' "$kubeadm_patch_config" # Kubernetes 1.24 deprecated the '--container-runtime' kubelet argument in 1.24 and removed it in 1.27 # See: https://kubernetes.io/blog/2023/03/17/upcoming-changes-in-kubernetes-v1-27/#removal-of-container-runtime-command-line-argument if [ "$KUBERNETES_TARGET_VERSION_MINOR" -ge "24" ]; then # remove kubeletExtraArgs.container-runtime from the containerd kubeadm addon patch sed -i '/container-runtime:/d' "$kubeadm_patch_config" fi } # containerd_patch_for_minor_version returns the maximum patch version for the given minor version. uses # $CONTAINERD_STEP_VERSIONS to determine the max patch. if the minor version is not found, returns an # empty string. function containerd_patch_for_minor_version() { local for_major=$1 local for_minor=$2 for i in "${CONTAINERD_STEP_VERSIONS[@]}"; do semverParse "$i" if [ "$major" == "$for_major" ] && [ "$minor" == "$for_minor" ]; then echo "$patch" return 0 fi done echo "" } # containerd_migration_steps returns an array with all steps necessary to migrate from the current containerd # version to the desired version. function containerd_migration_steps() { local from_version=$1 local to_version=$2 local current_minor local current_major semverParse "$from_version" current_major="$major" current_minor=$((minor + 1)) local install_minor semverParse "$to_version" install_minor="$minor" install_major="$major" local steps=() while [ "$current_minor" -lt "$install_minor" ]; do max_patch=$(containerd_patch_for_minor_version "$current_major" "$current_minor") if [ -z "$max_patch" ]; then bail "error: could not find patch for containerd minor version v$current_major.$current_minor" fi steps+=("$install_major.$current_minor.$max_patch") current_minor=$((current_minor + 1)) done steps+=("$to_version") echo "${steps[@]}" } # containerd_upgrade_between_majors returns true if the upgrade is between major versions. function containerd_upgrade_between_majors() { local from_version=$1 local to_version=$2 local from_major semverParse "$from_version" from_major="$major" local to_major semverParse "$to_version" to_major="$major" test "$from_major" -ne "$to_major" } # containerd_upgrade_is_possible verifies if an upgrade between the provided containerd # versions is possible. we verify if the installed containerd is known to us, if there # is no major versions upgrades and if the minor version upgrade is not too big. function containerd_upgrade_is_possible() { local from_version=$1 local to_version=$2 # so far we don't have containerd version 2 and when it comes we don't know exactly # from what version we will be able to upgrade to it from. so, for now, we block # the attempt so when the version arrives the testgrid will fail. if containerd_upgrade_between_majors "$from_version" "$to_version" ; then bail "Upgrade between containerd major versions is not supported by this installer." fi semverCompare "$from_version" "$to_version" if [ "$SEMVER_COMPARE_RESULT" = "1" ]; then bail "Downgrading containerd (from v$from_version to v$to_version) is not supported." fi semverParse "$from_version" local current_minor current_minor="$minor" semverParse "$to_version" local installing_minor installing_minor="$minor" if [ "$installing_minor" -gt "$((current_minor + 2))" ]; then logFail "Cannot upgrade containerd from v$from_version to v$to_version" logFail "This installer supports only containerd upgrades spanning two minor versions." bail "Please consider upgrading to an older containerd version first." fi } # containerd_evaluate_upgrade verifies if containerd upgrade between the two provided versions # is possible and in case it is, returns the list of steps necessary to perform the upgrade. # each step is a version of containerd that we need to install. export CONTAINERD_INSTALL_VERSIONS=() function containerd_evaluate_upgrade() { local from_version=$1 local to_version=$2 if use_os_containerd ; then return 0 fi echo "Evaluating if an upgrade from containerd v$from_version to v$to_version is possible." containerd_upgrade_is_possible "$from_version" "$to_version" echo "Containerd upgrade from v$from_version to v$to_version is possible." for version in $(containerd_migration_steps "$from_version" "$to_version"); do CONTAINERD_INSTALL_VERSIONS+=("$version") done } function use_os_containerd() { if ! host_packages_shipped && ! is_rhel_9_variant ; then # we ship containerd packages for RHEL9, but not for the later no-shipped-packages distros return 0 fi return 1 } KUBERNETES_INIT_IGNORE_PREFLIGHT_ERRORS="${KUBERNETES_INIT_IGNORE_PREFLIGHT_ERRORS:-}" function init() { logStep "Initialize Kubernetes" kubernetes_maybe_generate_bootstrap_token local addr="$PRIVATE_ADDRESS" local port="6443" API_SERVICE_ADDRESS="$PRIVATE_ADDRESS:6443" if [ "$HA_CLUSTER" = "1" ]; then addr="$LOAD_BALANCER_ADDRESS" port="$LOAD_BALANCER_PORT" fi addr=$($DIR/bin/kurl netutil format-ip-address "$addr") API_SERVICE_ADDRESS="$addr:$port" local oldLoadBalancerAddress=$(kubernetes_load_balancer_address) if commandExists ekco_handle_load_balancer_address_change_pre_init; then ekco_handle_load_balancer_address_change_pre_init $oldLoadBalancerAddress $LOAD_BALANCER_ADDRESS fi if [ "$EKCO_ENABLE_INTERNAL_LOAD_BALANCER" = "1" ] && commandExists ekco_bootstrap_internal_lb; then ekco_bootstrap_internal_lb fi local kustomize_kubeadm_init="$DIR/kustomize/kubeadm/init" local NODE_HOSTNAME= NODE_HOSTNAME=$(get_local_node_name) # if the hostname is overridden, patch the kubeadm config to use the overridden hostname if [ "$NODE_HOSTNAME" != "$(hostname | tr '[:upper:]' '[:lower:]')" ]; then render_yaml_file_2 "$kustomize_kubeadm_init/kubeadm-init-hostname.patch.tmpl.yaml" \ > "$kustomize_kubeadm_init/kubeadm-init-hostname.patch.yaml" insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ kubeadm-init-hostname.patch.yaml fi CERT_KEY= CERT_KEY_EXPIRY= if [ "$HA_CLUSTER" = "1" ]; then CERT_KEY=$(< /dev/urandom tr -dc a-f0-9 | head -c64) CERT_KEY_EXPIRY=$(TZ="UTC" date -d "+2 hour" --rfc-3339=second | sed 's/ /T/') insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-certificate-key.yaml fi # kustomize can merge multiple list patches in some cases but it is not working for me on the # ClusterConfiguration.apiServer.certSANs list if [ -n "$PUBLIC_ADDRESS" ] && [ -n "$LOAD_BALANCER_ADDRESS" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-public-and-load-balancer-address.yaml elif [ -n "$PUBLIC_ADDRESS" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-public-address.yaml elif [ -n "$LOAD_BALANCER_ADDRESS" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-load-balancer-address.yaml fi # conditional kubelet configuration fields if [ "$KUBERNETES_TARGET_VERSION_MINOR" -ge "21" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-21.yaml else insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-pre21.yaml fi if [ "$KUBERNETES_CIS_COMPLIANCE" == "1" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-cis-compliance.yaml if [ "$KUBERNETES_TARGET_VERSION_MINOR" -ge "20" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-cluster-config-cis-compliance.yaml else insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-cluster-config-cis-compliance-insecure-port.yaml fi fi if [ "$KUBE_RESERVED" == "1" ]; then # gets the memory and CPU capacity of the worker node MEMORY_MI=$(free -m | grep Mem | awk '{print $2}') CPU_MILLICORES=$(($(nproc) * 1000)) # calculates the amount of each resource to reserve mebibytes_to_reserve=$(get_memory_mebibytes_to_reserve $MEMORY_MI) cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve $CPU_MILLICORES) insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-reserve-compute-resources.yaml render_yaml_file $kustomize_kubeadm_init/patch-kubelet-reserve-compute-resources.tpl > $kustomize_kubeadm_init/patch-kubelet-reserve-compute-resources.yaml fi if [ -n "$EVICTION_THRESHOLD" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-eviction-threshold.yaml render_yaml_file $kustomize_kubeadm_init/patch-kubelet-eviction-threshold.tpl > $kustomize_kubeadm_init/patch-kubelet-eviction-threshold.yaml fi if [ -n "$SYSTEM_RESERVED" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-system-reserved.yaml render_yaml_file $kustomize_kubeadm_init/patch-kubelet-system-reserved.tpl > $kustomize_kubeadm_init/patch-kubelet-system-reserved.yaml fi if [ -n "$CONTAINER_LOG_MAX_SIZE" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-container-log-max-size.yaml render_yaml_file $kustomize_kubeadm_init/patch-kubelet-container-log-max-size.tpl > $kustomize_kubeadm_init/patch-kubelet-container-log-max-size.yaml fi if [ -n "$CONTAINER_LOG_MAX_FILES" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-container-log-max-files.yaml render_yaml_file $kustomize_kubeadm_init/patch-kubelet-container-log-max-files.tpl > $kustomize_kubeadm_init/patch-kubelet-container-log-max-files.yaml fi if [ -n "$KUBERNETES_MAX_PODS_PER_NODE" ]; then insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ patch-kubelet-max-pods.yaml render_yaml_file_2 $kustomize_kubeadm_init/patch-kubelet-max-pods.tmpl.yaml > $kustomize_kubeadm_init/patch-kubelet-max-pods.yaml fi kubernetes_configure_pause_image "$kustomize_kubeadm_init" # Add kubeadm init patches from addons. for patch in $(ls -1 ${kustomize_kubeadm_init}-patches/* 2>/dev/null || echo); do patch_basename="$(basename $patch)" cp "$patch" "$kustomize_kubeadm_init/$patch_basename" kubeadm_customize_config "$kustomize_kubeadm_init/$patch_basename" insert_patches_strategic_merge \ $kustomize_kubeadm_init/kustomization.yaml \ "$patch_basename" done mkdir -p "$KUBEADM_CONF_DIR" # Generate kubeadm config kubectl kustomize $kustomize_kubeadm_init > "$KUBEADM_CONF_DIR/kubeadm-init-raw.yaml" render_yaml_file "$KUBEADM_CONF_DIR/kubeadm-init-raw.yaml" > "$KUBEADM_CONF_FILE" # kustomize requires assests have a metadata field while kubeadm config will reject yaml containing it # this uses a go binary found in kurl/cmd/yamlutil to strip the metadata field from the yaml # cp $KUBEADM_CONF_FILE $KUBEADM_CONF_DIR/kubeadm_conf_copy_in $DIR/bin/yamlutil -r -fp $KUBEADM_CONF_DIR/kubeadm_conf_copy_in -yp metadata mv $KUBEADM_CONF_DIR/kubeadm_conf_copy_in $KUBEADM_CONF_FILE # When no_proxy changes kubeadm init rewrites the static manifests and fails because the api is # restarting. Trigger the restart ahead of time and wait for it to be healthy. if [ -f "/etc/kubernetes/manifests/kube-apiserver.yaml" ] && [ -n "$no_proxy" ] && ! grep -Fq "$no_proxy" /etc/kubernetes/manifests/kube-apiserver.yaml ; then kubeadm init phase control-plane apiserver --config $KUBEADM_CONF_FILE sleep 2 if ! spinner_until 60 kubernetes_api_is_healthy; then echo "Failed to wait for kubernetes API restart after no_proxy change" # continue fi fi if [ "$HA_CLUSTER" = "1" ]; then UPLOAD_CERTS="--upload-certs" fi # kubeadm init temporarily taints this node which causes rook to move any mons on it and may # lead to a loss of quorum disable_rook_ceph_operator # since K8s 1.19.1 kubeconfigs point to local API server even in HA setup. When upgrading from # earlier versions and using a load balancer, kubeadm init will bail because the kubeconfigs # already exist pointing to the load balancer rm -rf /etc/kubernetes/*.conf # Regenerate api server cert in case load balancer address changed if [ -f /etc/kubernetes/pki/apiserver.crt ]; then mv -f /etc/kubernetes/pki/apiserver.crt /tmp/ fi if [ -f /etc/kubernetes/pki/apiserver.key ]; then mv -f /etc/kubernetes/pki/apiserver.key /tmp/ fi # ensure that /etc/kubernetes/audit.yaml exists cp $kustomize_kubeadm_init/audit.yaml /etc/kubernetes/audit.yaml mkdir -p /var/log/apiserver if [ -z "$KUBERNETES_INIT_IGNORE_PREFLIGHT_ERRORS" ]; then KUBERNETES_INIT_IGNORE_PREFLIGHT_ERRORS=all fi set -o pipefail cmd_retry 3 kubeadm init \ --ignore-preflight-errors="$KUBERNETES_INIT_IGNORE_PREFLIGHT_ERRORS" \ --config $KUBEADM_CONF_FILE \ $UPLOAD_CERTS \ | tee /tmp/kubeadm-init set +o pipefail # Node would be cordoned if migrated from docker to containerd kubectl uncordon "$(get_local_node_name)" if [ -n "$LOAD_BALANCER_ADDRESS" ]; then addr=$($DIR/bin/kurl netutil format-ip-address "$PRIVATE_ADDRESS") spinner_until 120 cert_has_san "$addr:6443" "$LOAD_BALANCER_ADDRESS" fi if commandExists ekco_cleanup_bootstrap_internal_lb; then ekco_cleanup_bootstrap_internal_lb fi spinner_kubernetes_api_stable exportKubeconfig KUBEADM_TOKEN_CA_HASH=$(cat /tmp/kubeadm-init | grep 'discovery-token-ca-cert-hash' | awk '{ print $2 }' | head -1) if [ "$KUBERNETES_CIS_COMPLIANCE" == "1" ]; then if [ "$KUBERNETES_TARGET_VERSION_MINOR" -le "24" ]; then kubectl apply -f $kustomize_kubeadm_init/pod-security-policy-privileged.yaml # patch 'PodSecurityPolicy' to kube-apiserver and wait for kube-apiserver to reconcile old_admission_plugins='--enable-admission-plugins=NodeRestriction' new_admission_plugins='--enable-admission-plugins=NodeRestriction,PodSecurityPolicy' sed -i "s%$old_admission_plugins%$new_admission_plugins%g" /etc/kubernetes/manifests/kube-apiserver.yaml spinner_kubernetes_api_stable fi # create an 'etcd' user and group and ensure that it owns the etcd data directory (we don't care what userid these have, as etcd will still run as root) useradd etcd || true groupadd etcd || true chown -R etcd:etcd /var/lib/etcd fi wait_for_nodes # workaround as some code relies on this legacy label kubectl label --overwrite node "$(get_local_node_name)" node-role.kubernetes.io/master= enable_rook_ceph_operator DID_INIT_KUBERNETES=1 logSuccess "Kubernetes Master Initialized" local currentLoadBalancerAddress=$(kubernetes_load_balancer_address) if [ "$currentLoadBalancerAddress" != "$oldLoadBalancerAddress" ]; then # restart scheduler and controller-manager on this node so they use the new address mv /etc/kubernetes/manifests/kube-scheduler.yaml /tmp/ && sleep 1 && mv /tmp/kube-scheduler.yaml /etc/kubernetes/manifests/ mv /etc/kubernetes/manifests/kube-controller-manager.yaml /tmp/ && sleep 1 && mv /tmp/kube-controller-manager.yaml /etc/kubernetes/manifests/ if kubernetes_has_remotes; then if commandExists ekco_handle_load_balancer_address_change_kubeconfigs; then ekco_handle_load_balancer_address_change_kubeconfigs fi if commandExists ekco_handle_load_balancer_address_change_post_init; then ekco_handle_load_balancer_address_change_post_init $oldLoadBalancerAddress $LOAD_BALANCER_ADDRESS fi fi # restart kube-proxies so they use the new address kubectl -n kube-system delete pods --selector=k8s-app=kube-proxy fi labelNodes kubectl cluster-info #approve csrs on the masters if cis compliance is enabled if [ "$KUBERNETES_CIS_COMPLIANCE" == "1" ]; then kubectl get csr | grep 'Pending' | grep 'kubelet-serving' | awk '{ print $1 }' | xargs -I {} kubectl certificate approve {} fi # create kurl namespace if it doesn't exist kubectl get ns kurl >/dev/null 2>&1 || kubectl create ns kurl --save-config spinner_until 120 kubernetes_default_service_account_exists spinner_until 120 kubernetes_service_exists logSuccess "Cluster Initialized" kubernetes_configure_coredns if commandExists registry_init; then registry_init if [ -n "$CONTAINERD_VERSION" ]; then ${K8S_DISTRO}_registry_containerd_configure "${DOCKER_REGISTRY_IP}" ${K8S_DISTRO}_containerd_restart spinner_kubernetes_api_healthy fi fi # install the kurl in-cluster troubleshoot supportbundle spec kubectl -n kurl apply -f "$DIR/manifests/troubleshoot.yaml" kubernetes_cis_chmod_kubelet_config_file kubernetes_cis_super_admin_credentials_file_permissions } function kubeadm_post_init() { BOOTSTRAP_TOKEN_EXPIRY=$(kubeadm token list | grep $BOOTSTRAP_TOKEN | awk '{print $3}') kurl_config } function kubernetes_maybe_generate_bootstrap_token() { if [ -z "$BOOTSTRAP_TOKEN" ]; then logStep "generate kubernetes bootstrap token" BOOTSTRAP_TOKEN=$(kubeadm token generate) fi echo "Kubernetes bootstrap token: ${BOOTSTRAP_TOKEN}" echo "This token will expire in 24 hours" } function kurl_config() { logStep "Persisting the kurl installer spec" if kubernetes_resource_exists kube-system configmap kurl-config; then kubectl -n kube-system delete configmap kurl-config fi kubectl -n kube-system create configmap kurl-config \ --from-literal=kurl_url="$KURL_URL" \ --from-literal=installer_id="$INSTALLER_ID" \ --from-literal=ha="$HA_CLUSTER" \ --from-literal=airgap="$AIRGAP" \ --from-literal=ca_hash="$KUBEADM_TOKEN_CA_HASH" \ --from-literal=docker_registry_ip="$DOCKER_REGISTRY_IP" \ --from-literal=kubernetes_api_address="$API_SERVICE_ADDRESS" \ --from-literal=bootstrap_token="$BOOTSTRAP_TOKEN" \ --from-literal=bootstrap_token_expiration="$BOOTSTRAP_TOKEN_EXPIRY" \ --from-literal=cert_key="$CERT_KEY" \ --from-literal=upload_certs_expiration="$CERT_KEY_EXPIRY" \ --from-literal=service_cidr="$SERVICE_CIDR" \ --from-literal=pod_cidr="$POD_CIDR" \ --from-literal=kurl_install_directory="$KURL_INSTALL_DIRECTORY_FLAG" \ --from-literal=additional_no_proxy_addresses="$ADDITIONAL_NO_PROXY_ADDRESSES" \ --from-literal=kubernetes_cis_compliance="$KUBERNETES_CIS_COMPLIANCE" logSuccess "Kurl installer spec was successfully persisted in the kurl configmap" } function outro() { echo if [ -z "$PUBLIC_ADDRESS" ]; then if [ -z "$PRIVATE_ADDRESS" ]; then PUBLIC_ADDRESS="" PRIVATE_ADDRESS="" else PUBLIC_ADDRESS="$PRIVATE_ADDRESS" fi fi local common_flags common_flags="${common_flags}$(get_docker_registry_ip_flag "${DOCKER_REGISTRY_IP}")" service_cidr=$(kubectl -n kube-system get cm kurl-config -ojsonpath='{ .data.service_cidr }') pod_cidr=$(kubectl -n kube-system get cm kurl-config -ojsonpath='{ .data.pod_cidr }') local no_proxy_addresses="" [ -n "$ADDITIONAL_NO_PROXY_ADDRESSES" ] && no_proxy_addresses="$ADDITIONAL_NO_PROXY_ADDRESSES" [ -n "$service_cidr" ] && no_proxy_addresses="${no_proxy_addresses:+$no_proxy_addresses,}$service_cidr" [ -n "$pod_cidr" ] && no_proxy_addresses="${no_proxy_addresses:+$no_proxy_addresses,}$pod_cidr" [ -n "$no_proxy_addresses" ] && common_flags="${common_flags}$(get_additional_no_proxy_addresses_flag 1 "$no_proxy_addresses")" common_flags="${common_flags}$(get_kurl_install_directory_flag "${KURL_INSTALL_DIRECTORY_FLAG}")" common_flags="${common_flags}$(get_remotes_flags)" common_flags="${common_flags}$(get_ipv6_flag)" KUBEADM_TOKEN_CA_HASH=$(cat /tmp/kubeadm-init | grep 'discovery-token-ca-cert-hash' | awk '{ print $2 }' | head -1) printf "\n" printf "\t\t${GREEN}Installation${NC}\n" printf "\t\t${GREEN} Complete ✔${NC}\n" addon_outro printf "\n" kubeconfig_setup_outro printf "\n" if [ "$OUTRO_NOTIFIY_TO_RESTART_DOCKER" = "1" ]; then printf "\n" printf "\n" printf "The local /etc/docker/daemon.json has been merged with the spec from the installer, but has not been applied. To apply restart docker." printf "\n" printf "\n" printf "${GREEN} systemctl daemon-reload${NC}\n" printf "${GREEN} systemctl restart docker${NC}\n" printf "\n" printf "These settings will automatically be applied on the next restart." printf "\n" fi printf "\n" printf "\n" local prefix= prefix="$(build_installer_prefix "${INSTALLER_ID}" "${KURL_VERSION}" "${KURL_URL}" "${PROXY_ADDRESS}" "${PROXY_HTTPS_ADDRESS}")" if [ "$HA_CLUSTER" = "1" ]; then printf "Master node join commands expire after two hours, and worker node join commands expire after 24 hours.\n" printf "\n" if [ "$AIRGAP" = "1" ]; then printf "To generate new node join commands, run ${GREEN}cat ./tasks.sh | sudo bash -s join_token ha airgap${NC} on an existing master node.\n" else printf "To generate new node join commands, run ${GREEN}${prefix}tasks.sh | sudo bash -s join_token ha${NC} on an existing master node.\n" fi else printf "Node join commands expire after 24 hours.\n" printf "\n" if [ "$AIRGAP" = "1" ]; then printf "To generate new node join commands, run ${GREEN}cat ./tasks.sh | sudo bash -s join_token airgap${NC} on this node.\n" else printf "To generate new node join commands, run ${GREEN}${prefix}tasks.sh | sudo bash -s join_token${NC} on this node.\n" fi fi if [ "$AIRGAP" = "1" ]; then printf "\n" printf "To add worker nodes to this installation, copy and unpack this bundle on your other nodes, and run the following:" printf "\n" printf "\n" printf "${GREEN} cat ./join.sh | sudo bash -s airgap kubernetes-master-address=${API_SERVICE_ADDRESS} kubeadm-token=${BOOTSTRAP_TOKEN} kubeadm-token-ca-hash=${KUBEADM_TOKEN_CA_HASH} kubernetes-version=${KUBERNETES_VERSION} ekco-address=${EKCO_ADDRESS} ekco-auth-token=${EKCO_AUTH_TOKEN}${common_flags}\n" printf "${NC}" printf "\n" printf "\n" if [ "$HA_CLUSTER" = "1" ]; then printf "\n" printf "To add ${GREEN}MASTER${NC} nodes to this installation, copy and unpack this bundle on your other nodes, and run the following:" printf "\n" printf "\n" printf "${GREEN} cat ./join.sh | sudo bash -s airgap kubernetes-master-address=${API_SERVICE_ADDRESS} kubeadm-token=${BOOTSTRAP_TOKEN} kubeadm-token-ca-hash=${KUBEADM_TOKEN_CA_HASH} kubernetes-version=${KUBERNETES_VERSION} cert-key=${CERT_KEY} control-plane ekco-address=${EKCO_ADDRESS} ekco-auth-token=${EKCO_AUTH_TOKEN}${common_flags}\n" printf "${NC}" printf "\n" printf "\n" fi else printf "\n" printf "To add worker nodes to this installation, run the following script on your other nodes:" printf "\n" printf "${GREEN} ${prefix}join.sh | sudo bash -s kubernetes-master-address=${API_SERVICE_ADDRESS} kubeadm-token=${BOOTSTRAP_TOKEN} kubeadm-token-ca-hash=${KUBEADM_TOKEN_CA_HASH} kubernetes-version=${KUBERNETES_VERSION} ekco-address=${EKCO_ADDRESS} ekco-auth-token=${EKCO_AUTH_TOKEN}${common_flags}\n" printf "${NC}" printf "\n" printf "\n" if [ "$HA_CLUSTER" = "1" ]; then printf "\n" printf "To add ${GREEN}MASTER${NC} nodes to this installation, run the following script on your other nodes:" printf "\n" printf "${GREEN} ${prefix}join.sh | sudo bash -s kubernetes-master-address=${API_SERVICE_ADDRESS} kubeadm-token=${BOOTSTRAP_TOKEN} kubeadm-token-ca-hash=$KUBEADM_TOKEN_CA_HASH kubernetes-version=${KUBERNETES_VERSION} cert-key=${CERT_KEY} control-plane ekco-address=${EKCO_ADDRESS} ekco-auth-token=${EKCO_AUTH_TOKEN}${common_flags}\n" printf "${NC}" printf "\n" printf "\n" fi fi } function all_kubernetes_install() { kubernetes_host install_helm ${K8S_DISTRO}_addon_for_each addon_load helm_load init apply_installer_crd } function report_kubernetes_install() { report_addon_start "kubernetes" "$KUBERNETES_VERSION" export REPORTING_CONTEXT_INFO="kubernetes $KUBERNETES_VERSION" all_kubernetes_install export REPORTING_CONTEXT_INFO="" report_addon_success "kubernetes" "$KUBERNETES_VERSION" } K8S_DISTRO=kubeadm function main() { logStep "Running install with the argument(s): $*" require_root_user # ensure /usr/local/bin/kubectl-plugin is in the path path_add "/usr/local/bin" kubernetes_init_hostname get_patch_yaml "$@" maybe_read_kurl_config_from_cluster if [ "$AIRGAP" = "1" ]; then move_airgap_assets fi pushd_install_directory yaml_airgap proxy_bootstrap download_util_binaries get_machine_id merge_yaml_specs apply_bash_flag_overrides "$@" parse_yaml_into_bash_variables MASTER=1 # parse_yaml_into_bash_variables will unset master prompt_license export KUBECONFIG=/etc/kubernetes/admin.conf is_ha parse_kubernetes_target_version discover full-cluster report_install_start setup_remote_commands_dirs trap ctrl_c SIGINT # trap ctrl+c (SIGINT) and handle it by reporting that the user exited intentionally (along with the line/version/etc) trap trap_report_error ERR # trap errors and handle it by reporting the error line and parent function preflights init_preflights kubernetes_upgrade_preflight common_prompts journald_persistent configure_proxy configure_no_proxy_preinstall ${K8S_DISTRO}_addon_for_each addon_fetch kubernetes_get_packages preflights_require_host_packages install_host_dependencies # this installs fio, which is used by host preflight checks if [ -z "$CURRENT_KUBERNETES_VERSION" ]; then host_preflights "1" "0" "0" cluster_preflights "1" "0" "0" else host_preflights "1" "0" "1" cluster_preflights "1" "0" "1" fi get_common setup_kubeadm_kustomize rook_upgrade_maybe_report_upgrade_rook kubernetes_pre_init ${K8S_DISTRO}_addon_for_each addon_pre_init discover_pod_subnet discover_service_subnet configure_no_proxy install_cri kubernetes_configure_pause_image_upgrade get_shared report_upgrade_kubernetes report_kubernetes_install export SUPPORT_BUNDLE_READY=1 # allow ctrl+c and ERR traps to collect support bundles now that k8s is installed kurl_init_config maybe_set_kurl_cluster_uuid kurl_install_support_bundle_configmap ${K8S_DISTRO}_addon_for_each addon_install maybe_cleanup_rook maybe_cleanup_longhorn helmfile_sync kubeadm_post_init uninstall_docker ${K8S_DISTRO}_addon_for_each addon_post_init check_proxy_config rook_maybe_migrate_from_openebs outro package_cleanup popd_install_directory report_install_success } # tee logs into /var/log/kurl/install-.log and stdout mkdir -p /var/log/kurl LOGFILE="/var/log/kurl/install-$(date +"%Y-%m-%dT%H-%M-%S").log" main "$@" 2>&1 | tee $LOGFILE # it is required to return the exit status of the script FINAL_RESULT="${PIPESTATUS[0]}" sed -i "/\b\(password\)\b/d" $LOGFILE > /dev/null 2>&1 exit "$FINAL_RESULT"