diff --git a/pipelines/concourse-workers/pipeline.yml b/pipelines/concourse-workers/pipeline.yml index aa9a485c3..e5671467e 100644 --- a/pipelines/concourse-workers/pipeline.yml +++ b/pipelines/concourse-workers/pipeline.yml @@ -23,6 +23,7 @@ meta: # GKE account info and which zone the clusters should be created in and deleted from. gke_account_params: &gke_account_params + CLUSTER_REGION: us-west1 CLUSTER_ZONE: us-west1-c GCP_PROJECT: ((gcp-project-name)) GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username)) diff --git a/pipelines/main/pipeline.yml b/pipelines/main/pipeline.yml index 580768e4d..b744a1878 100644 --- a/pipelines/main/pipeline.yml +++ b/pipelines/main/pipeline.yml @@ -85,7 +85,8 @@ meta: # GKE account info and which zone the clusters should be created in and deleted from. gke_account_params: &gke_account_params - CLUSTER_ZONE: us-west1-c + # CLUSTER_ZONE: us-west1-c + CLUSTER_REGION: us-west1 GCP_PROJECT: ((gcp-project-name)) GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username)) GCP_JSON_KEY: ((gke-test-pool-manager-json-key)) diff --git a/pipelines/pull-requests/pipeline.yml b/pipelines/pull-requests/pipeline.yml index 5c85b171f..4b201e034 100644 --- a/pipelines/pull-requests/pipeline.yml +++ b/pipelines/pull-requests/pipeline.yml @@ -73,7 +73,8 @@ meta: # GKE account info and which zone the clusters should be created in and deleted from. gke_account_params: &gke_account_params - CLUSTER_ZONE: us-west1-c + # CLUSTER_ZONE: us-west1-c + CLUSTER_REGION: us-west1 GCP_PROJECT: ((gcp-project-name)) GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username)) GCP_JSON_KEY: ((gke-test-pool-manager-json-key)) diff --git a/pipelines/shared-tasks/deploy-gke-cluster/task.sh b/pipelines/shared-tasks/deploy-gke-cluster/task.sh index 84c671ba2..5be721119 100755 --- a/pipelines/shared-tasks/deploy-gke-cluster/task.sh +++ b/pipelines/shared-tasks/deploy-gke-cluster/task.sh @@ -13,13 +13,21 @@ export USE_GKE_GCLOUD_AUTH_PLUGIN=True cd deploy-gke-cluster-output gcloud auth activate-service-account "$GCP_SERVICE_ACCOUNT" --key-file <(echo "$GCP_JSON_KEY") --project "$GCP_PROJECT" +# Decide if we want a regional or zonal cluster. +if [[ -n "$CLUSTER_REGION" ]]; then + region_or_zone_flag="--region=$CLUSTER_REGION" + region_or_zone_suffix="region-$CLUSTER_REGION" +else + region_or_zone_flag="--zone=$CLUSTER_ZONE" + region_or_zone_suffix="zone-$CLUSTER_ZONE" +fi if [ -n "$KUBE_VERSION" ]; then echo echo "Trying to use Kubernetes version $KUBE_VERSION" # Look up the latest GKE version for KUBE_VERSION. - GKE_VERSIONS="$(gcloud container get-server-config --zone "$CLUSTER_ZONE" --format json \ + GKE_VERSIONS="$(gcloud container get-server-config "$region_or_zone_flag" --format json \ | jq -r '.validMasterVersions[]')" echo echo "Found all versions of Kubernetes supported by GKE:" @@ -36,18 +44,18 @@ else export VERSION_FLAG="--release-channel=${GKE_CHANNEL:-"regular"}" fi -# Include the zone of the cluster in its name. This will allow us to change our preferred zone for new -# clusters anytime we want, and the existing clusters can still be deleted because the old zone can +# Include the region or zone of the cluster in its name. This will allow us to change our preferred region/zone for new +# clusters anytime we want, and the existing clusters can still be deleted because the old region/zone can # be parsed out from the cluster name at deletion time. -CLUSTER_NAME="gke-$(openssl rand -hex 4)-zone-${CLUSTER_ZONE}" +CLUSTER_NAME="gke-$(openssl rand -hex 4)-${region_or_zone_suffix}" # The cluster name becomes the name of the lock in the pool. -echo "$CLUSTER_NAME" > name +echo "$CLUSTER_NAME" >name # Start the cluster # Note that --enable-network-policy is required to enable NetworkPolicy resources. Otherwise they are ignored. gcloud container clusters create "$CLUSTER_NAME" \ - --zone "$CLUSTER_ZONE" \ + "$region_or_zone_flag" \ "$VERSION_FLAG" \ --num-nodes 1 \ --machine-type e2-standard-8 \ @@ -67,7 +75,7 @@ gcloud container clusters create "$CLUSTER_NAME" \ --services-secondary-range-name "pods" # Get the cluster details back, including the admin certificate: -gcloud container clusters describe "$CLUSTER_NAME" --zone "$CLUSTER_ZONE" --format json \ +gcloud container clusters describe "$CLUSTER_NAME" "$region_or_zone_flag" --format json \ > /tmp/cluster.json # Make a new kubeconfig user "cluster-admin" using the admin cert. diff --git a/pipelines/shared-tasks/deploy-gke-cluster/task.yml b/pipelines/shared-tasks/deploy-gke-cluster/task.yml index 63dd39634..5af7cd880 100644 --- a/pipelines/shared-tasks/deploy-gke-cluster/task.yml +++ b/pipelines/shared-tasks/deploy-gke-cluster/task.yml @@ -10,6 +10,7 @@ outputs: params: KUBE_VERSION: CLUSTER_ZONE: + CLUSTER_REGION: GCP_PROJECT: GCP_SERVICE_ACCOUNT: GCP_JSON_KEY: diff --git a/pipelines/shared-tasks/remove-gke-cluster/task.sh b/pipelines/shared-tasks/remove-gke-cluster/task.sh index 8b21b94d7..a62edd102 100755 --- a/pipelines/shared-tasks/remove-gke-cluster/task.sh +++ b/pipelines/shared-tasks/remove-gke-cluster/task.sh @@ -9,28 +9,41 @@ CLUSTER_NAME="$(cat gke-cluster-pool/name)" export CLUSTER_NAME export KUBECONFIG="gke-cluster-pool/metadata" -# Parse the zone name from the cluster name, in case it was created in a different zone -# compared to the zone in which we are currently creating new clusters. +# Parse the region or zone name from the cluster name, in case it was created in a different region/zone +# compared to the region/zone in which we are currently creating new clusters. zone=${CLUSTER_NAME##*-zone-} -# If the zone name was empty, or if there was no zone delimiter in the cluster name to start with... -if [[ -z $zone || "$CLUSTER_NAME" != *"-zone-"* ]]; then - echo "Umm... the cluster name did not contain a zone name." +region=${CLUSTER_NAME##*-region-} + +# If the region/zone name was empty, or if there was no region/zone delimiter in the cluster name to start with... +if [[ (-z $zone || "$CLUSTER_NAME" != *"-zone-"*) && (-z $region || "$CLUSTER_NAME" != *"-region-"*) ]]; then + echo "Umm... the cluster name $CLUSTER_NAME did not contain either region or zone name." exit 1 fi +# Decide if we have a regional or zonal cluster. +if [[ -n "$region" ]]; then + region_or_zone_flag="--region=$region" +else + region_or_zone_flag="--zone=$zone" +fi + gcloud auth activate-service-account "$GCP_SERVICE_ACCOUNT" --key-file <(echo "$GCP_JSON_KEY") --project "$GCP_PROJECT" for i in $(seq 1 10); do echo "Checking $CLUSTER_NAME for ongoing operations (iteration $i)...." - running_ops=$(gcloud container operations list --filter="targetLink:$CLUSTER_NAME AND status != done" --project "$GCP_PROJECT" --zone "$zone" --format yaml) + running_ops=$(gcloud container operations list \ + --filter="targetLink:$CLUSTER_NAME AND status != done" \ + --project "$GCP_PROJECT" "$region_or_zone_flag" --format yaml) if [[ -z "$running_ops" ]]; then + echo break fi echo "Found a running cluster operation:" echo "$running_ops" + echo # Give some time for the operation to finsh before checking again. sleep 30 done echo "Removing $CLUSTER_NAME..." -gcloud container clusters delete "$CLUSTER_NAME" --zone "$zone" --quiet +gcloud container clusters delete "$CLUSTER_NAME" "$region_or_zone_flag" --quiet diff --git a/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh index 0cb05ec50..fbd1d84c5 100755 --- a/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh +++ b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright 2024 the Pinniped contributors. All Rights Reserved. +# Copyright 2024-2025 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # Sometimes something goes wrong with a GKE test job's cleanup and a @@ -11,7 +11,8 @@ # 1. Are running in GCP with a name that indicates that it was auto-created for testing, # 2. And are older than some number of hours since their creation time. # -# Params are CLUSTER_ZONE, GCP_PROJECT, GCP_SERVICE_ACCOUNT, and GCP_JSON_KEY. +# Params are CLUSTER_REGION, CLUSTER_ZONE, GCP_PROJECT, GCP_SERVICE_ACCOUNT, and GCP_JSON_KEY. +# Search for both zonal and regional orphaned clusters. set -euo pipefail @@ -20,17 +21,23 @@ gcloud auth activate-service-account \ --key-file <(echo "$GCP_JSON_KEY") \ --project "$GCP_PROJECT" -all_cloud=($(gcloud container clusters list \ +all_zonal=($(gcloud container clusters list \ --zone "$CLUSTER_ZONE" --project "$GCP_PROJECT" \ --filter "name~gke-[a-f0-9]+-zone-${CLUSTER_ZONE}" --format 'table[no-heading](name)' | sort)) +all_regional=($(gcloud container clusters list \ + --region "$CLUSTER_REGION" --project "$GCP_PROJECT" \ + --filter "name~gke-[a-f0-9]+-region-${CLUSTER_REGION}" --format 'table[no-heading](name)' | sort)) + now_in_seconds_since_epoch=$(date +"%s") hours_ago_to_delete=2 -clusters_to_remove=() +regional_clusters_to_remove=() +zonal_clusters_to_remove=() echo echo "All auto-created GKE clusters (with creation time in UTC):" -for i in "${all_cloud[@]}"; do + +for i in "${all_zonal[@]}"; do creation_time=$(gcloud container clusters describe "$i" \ --zone "$CLUSTER_ZONE" --project "$GCP_PROJECT" \ --format 'table[no-heading](createTime.date(tz=UTC))') @@ -39,7 +46,7 @@ for i in "${all_cloud[@]}"; do # Note: on MacOS this date command would be: date -ju -f '%Y-%m-%dT%H:%M:%S' "$creation_time" '+%s' creation_time_seconds_since_epoch=$(date -u -d "$creation_time" '+%s') if (($((now_in_seconds_since_epoch - creation_time_seconds_since_epoch)) > $((hours_ago_to_delete * 60 * 60)))); then - clusters_to_remove+=("$i") + zonal_clusters_to_remove+=("$i") echo "$i $creation_time (older than $hours_ago_to_delete hours)" else echo "$i $creation_time (less than $hours_ago_to_delete hours old)" @@ -49,16 +56,45 @@ for i in "${all_cloud[@]}"; do exit 1 fi done -if [[ ${#all_cloud[@]} -eq 0 ]]; then + +for i in "${all_regional[@]}"; do + creation_time=$(gcloud container clusters describe "$i" \ + --region "$CLUSTER_REGION" --project "$GCP_PROJECT" \ + --format 'table[no-heading](createTime.date(tz=UTC))') + # UTC date format example: 2022-04-01T17:01:59 + if [[ "$creation_time" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ ]]; then + # Note: on MacOS this date command would be: date -ju -f '%Y-%m-%dT%H:%M:%S' "$creation_time" '+%s' + creation_time_seconds_since_epoch=$(date -u -d "$creation_time" '+%s') + if (($((now_in_seconds_since_epoch - creation_time_seconds_since_epoch)) > $((hours_ago_to_delete * 60 * 60)))); then + regional_clusters_to_remove+=("$i") + echo "$i $creation_time (older than $hours_ago_to_delete hours)" + else + echo "$i $creation_time (less than $hours_ago_to_delete hours old)" + fi + else + echo "GKE cluster creation time not in expected time format: $creation_time" + exit 1 + fi +done + +if [[ ${#all_zonal[@]} -eq 0 && ${#all_regional[@]} -eq 0 ]]; then echo "none" fi echo -if [[ ${#clusters_to_remove[@]} -eq 0 ]]; then - echo "No old orphaned GKE clusters found to remove." +if [[ ${#zonal_clusters_to_remove[@]} -eq 0 ]]; then + echo "No old orphaned zonal GKE clusters found to remove." else - echo "Removing ${#clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_ZONE: ${clusters_to_remove[*]} ..." - gcloud container clusters delete --zone "${CLUSTER_ZONE}" --quiet ${clusters_to_remove[*]} + echo "Removing ${#zonal_clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_ZONE: ${zonal_clusters_to_remove[*]} ..." + gcloud container clusters delete --zone "${CLUSTER_ZONE}" --quiet ${zonal_clusters_to_remove[*]} +fi + +echo +if [[ ${#regional_clusters_to_remove[@]} -eq 0 ]]; then + echo "No old orphaned regional GKE clusters found to remove." +else + echo "Removing ${#regional_clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_REGION: ${regional_clusters_to_remove[*]} ..." + gcloud container clusters delete --region "${CLUSTER_REGION}" --quiet ${regional_clusters_to_remove[*]} fi echo diff --git a/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml index dd28a386f..e7d03968e 100644 --- a/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml +++ b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml @@ -1,4 +1,4 @@ -# Copyright 2020-2024 the Pinniped contributors. All Rights Reserved. +# Copyright 2020-2025 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 --- @@ -7,6 +7,7 @@ inputs: - name: pinniped-ci outputs: params: + CLUSTER_REGION: CLUSTER_ZONE: GCP_PROJECT: GCP_SERVICE_ACCOUNT: