try using regional GKE clusters instead of zonal

This commit is contained in:
Ryan Richard
2025-08-02 08:43:48 -07:00
parent 16dd97aff4
commit 07b4fb8dcc
8 changed files with 90 additions and 28 deletions

View File

@@ -23,6 +23,7 @@ meta:
# GKE account info and which zone the clusters should be created in and deleted from.
gke_account_params: &gke_account_params
CLUSTER_REGION: us-west1
CLUSTER_ZONE: us-west1-c
GCP_PROJECT: ((gcp-project-name))
GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username))

View File

@@ -85,7 +85,8 @@ meta:
# GKE account info and which zone the clusters should be created in and deleted from.
gke_account_params: &gke_account_params
CLUSTER_ZONE: us-west1-c
# CLUSTER_ZONE: us-west1-c
CLUSTER_REGION: us-west1
GCP_PROJECT: ((gcp-project-name))
GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username))
GCP_JSON_KEY: ((gke-test-pool-manager-json-key))

View File

@@ -73,7 +73,8 @@ meta:
# GKE account info and which zone the clusters should be created in and deleted from.
gke_account_params: &gke_account_params
CLUSTER_ZONE: us-west1-c
# CLUSTER_ZONE: us-west1-c
CLUSTER_REGION: us-west1
GCP_PROJECT: ((gcp-project-name))
GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username))
GCP_JSON_KEY: ((gke-test-pool-manager-json-key))

View File

@@ -13,13 +13,21 @@ export USE_GKE_GCLOUD_AUTH_PLUGIN=True
cd deploy-gke-cluster-output
gcloud auth activate-service-account "$GCP_SERVICE_ACCOUNT" --key-file <(echo "$GCP_JSON_KEY") --project "$GCP_PROJECT"
# Decide if we want a regional or zonal cluster.
if [[ -n "$CLUSTER_REGION" ]]; then
region_or_zone_flag="--region=$CLUSTER_REGION"
region_or_zone_suffix="region-$CLUSTER_REGION"
else
region_or_zone_flag="--zone=$CLUSTER_ZONE"
region_or_zone_suffix="zone-$CLUSTER_ZONE"
fi
if [ -n "$KUBE_VERSION" ]; then
echo
echo "Trying to use Kubernetes version $KUBE_VERSION"
# Look up the latest GKE version for KUBE_VERSION.
GKE_VERSIONS="$(gcloud container get-server-config --zone "$CLUSTER_ZONE" --format json \
GKE_VERSIONS="$(gcloud container get-server-config "$region_or_zone_flag" --format json \
| jq -r '.validMasterVersions[]')"
echo
echo "Found all versions of Kubernetes supported by GKE:"
@@ -36,18 +44,18 @@ else
export VERSION_FLAG="--release-channel=${GKE_CHANNEL:-"regular"}"
fi
# Include the zone of the cluster in its name. This will allow us to change our preferred zone for new
# clusters anytime we want, and the existing clusters can still be deleted because the old zone can
# Include the region or zone of the cluster in its name. This will allow us to change our preferred region/zone for new
# clusters anytime we want, and the existing clusters can still be deleted because the old region/zone can
# be parsed out from the cluster name at deletion time.
CLUSTER_NAME="gke-$(openssl rand -hex 4)-zone-${CLUSTER_ZONE}"
CLUSTER_NAME="gke-$(openssl rand -hex 4)-${region_or_zone_suffix}"
# The cluster name becomes the name of the lock in the pool.
echo "$CLUSTER_NAME" > name
echo "$CLUSTER_NAME" >name
# Start the cluster
# Note that --enable-network-policy is required to enable NetworkPolicy resources. Otherwise they are ignored.
gcloud container clusters create "$CLUSTER_NAME" \
--zone "$CLUSTER_ZONE" \
"$region_or_zone_flag" \
"$VERSION_FLAG" \
--num-nodes 1 \
--machine-type e2-standard-8 \
@@ -67,7 +75,7 @@ gcloud container clusters create "$CLUSTER_NAME" \
--services-secondary-range-name "pods"
# Get the cluster details back, including the admin certificate:
gcloud container clusters describe "$CLUSTER_NAME" --zone "$CLUSTER_ZONE" --format json \
gcloud container clusters describe "$CLUSTER_NAME" "$region_or_zone_flag" --format json \
> /tmp/cluster.json
# Make a new kubeconfig user "cluster-admin" using the admin cert.

View File

@@ -10,6 +10,7 @@ outputs:
params:
KUBE_VERSION:
CLUSTER_ZONE:
CLUSTER_REGION:
GCP_PROJECT:
GCP_SERVICE_ACCOUNT:
GCP_JSON_KEY:

View File

@@ -9,28 +9,41 @@ CLUSTER_NAME="$(cat gke-cluster-pool/name)"
export CLUSTER_NAME
export KUBECONFIG="gke-cluster-pool/metadata"
# Parse the zone name from the cluster name, in case it was created in a different zone
# compared to the zone in which we are currently creating new clusters.
# Parse the region or zone name from the cluster name, in case it was created in a different region/zone
# compared to the region/zone in which we are currently creating new clusters.
zone=${CLUSTER_NAME##*-zone-}
# If the zone name was empty, or if there was no zone delimiter in the cluster name to start with...
if [[ -z $zone || "$CLUSTER_NAME" != *"-zone-"* ]]; then
echo "Umm... the cluster name did not contain a zone name."
region=${CLUSTER_NAME##*-region-}
# If the region/zone name was empty, or if there was no region/zone delimiter in the cluster name to start with...
if [[ (-z $zone || "$CLUSTER_NAME" != *"-zone-"*) && (-z $region || "$CLUSTER_NAME" != *"-region-"*) ]]; then
echo "Umm... the cluster name $CLUSTER_NAME did not contain either region or zone name."
exit 1
fi
# Decide if we have a regional or zonal cluster.
if [[ -n "$region" ]]; then
region_or_zone_flag="--region=$region"
else
region_or_zone_flag="--zone=$zone"
fi
gcloud auth activate-service-account "$GCP_SERVICE_ACCOUNT" --key-file <(echo "$GCP_JSON_KEY") --project "$GCP_PROJECT"
for i in $(seq 1 10); do
echo "Checking $CLUSTER_NAME for ongoing operations (iteration $i)...."
running_ops=$(gcloud container operations list --filter="targetLink:$CLUSTER_NAME AND status != done" --project "$GCP_PROJECT" --zone "$zone" --format yaml)
running_ops=$(gcloud container operations list \
--filter="targetLink:$CLUSTER_NAME AND status != done" \
--project "$GCP_PROJECT" "$region_or_zone_flag" --format yaml)
if [[ -z "$running_ops" ]]; then
echo
break
fi
echo "Found a running cluster operation:"
echo "$running_ops"
echo
# Give some time for the operation to finsh before checking again.
sleep 30
done
echo "Removing $CLUSTER_NAME..."
gcloud container clusters delete "$CLUSTER_NAME" --zone "$zone" --quiet
gcloud container clusters delete "$CLUSTER_NAME" "$region_or_zone_flag" --quiet

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
# Copyright 2024 the Pinniped contributors. All Rights Reserved.
# Copyright 2024-2025 the Pinniped contributors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
# Sometimes something goes wrong with a GKE test job's cleanup and a
@@ -11,7 +11,8 @@
# 1. Are running in GCP with a name that indicates that it was auto-created for testing,
# 2. And are older than some number of hours since their creation time.
#
# Params are CLUSTER_ZONE, GCP_PROJECT, GCP_SERVICE_ACCOUNT, and GCP_JSON_KEY.
# Params are CLUSTER_REGION, CLUSTER_ZONE, GCP_PROJECT, GCP_SERVICE_ACCOUNT, and GCP_JSON_KEY.
# Search for both zonal and regional orphaned clusters.
set -euo pipefail
@@ -20,17 +21,23 @@ gcloud auth activate-service-account \
--key-file <(echo "$GCP_JSON_KEY") \
--project "$GCP_PROJECT"
all_cloud=($(gcloud container clusters list \
all_zonal=($(gcloud container clusters list \
--zone "$CLUSTER_ZONE" --project "$GCP_PROJECT" \
--filter "name~gke-[a-f0-9]+-zone-${CLUSTER_ZONE}" --format 'table[no-heading](name)' | sort))
all_regional=($(gcloud container clusters list \
--region "$CLUSTER_REGION" --project "$GCP_PROJECT" \
--filter "name~gke-[a-f0-9]+-region-${CLUSTER_REGION}" --format 'table[no-heading](name)' | sort))
now_in_seconds_since_epoch=$(date +"%s")
hours_ago_to_delete=2
clusters_to_remove=()
regional_clusters_to_remove=()
zonal_clusters_to_remove=()
echo
echo "All auto-created GKE clusters (with creation time in UTC):"
for i in "${all_cloud[@]}"; do
for i in "${all_zonal[@]}"; do
creation_time=$(gcloud container clusters describe "$i" \
--zone "$CLUSTER_ZONE" --project "$GCP_PROJECT" \
--format 'table[no-heading](createTime.date(tz=UTC))')
@@ -39,7 +46,7 @@ for i in "${all_cloud[@]}"; do
# Note: on MacOS this date command would be: date -ju -f '%Y-%m-%dT%H:%M:%S' "$creation_time" '+%s'
creation_time_seconds_since_epoch=$(date -u -d "$creation_time" '+%s')
if (($((now_in_seconds_since_epoch - creation_time_seconds_since_epoch)) > $((hours_ago_to_delete * 60 * 60)))); then
clusters_to_remove+=("$i")
zonal_clusters_to_remove+=("$i")
echo "$i $creation_time (older than $hours_ago_to_delete hours)"
else
echo "$i $creation_time (less than $hours_ago_to_delete hours old)"
@@ -49,16 +56,45 @@ for i in "${all_cloud[@]}"; do
exit 1
fi
done
if [[ ${#all_cloud[@]} -eq 0 ]]; then
for i in "${all_regional[@]}"; do
creation_time=$(gcloud container clusters describe "$i" \
--region "$CLUSTER_REGION" --project "$GCP_PROJECT" \
--format 'table[no-heading](createTime.date(tz=UTC))')
# UTC date format example: 2022-04-01T17:01:59
if [[ "$creation_time" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ ]]; then
# Note: on MacOS this date command would be: date -ju -f '%Y-%m-%dT%H:%M:%S' "$creation_time" '+%s'
creation_time_seconds_since_epoch=$(date -u -d "$creation_time" '+%s')
if (($((now_in_seconds_since_epoch - creation_time_seconds_since_epoch)) > $((hours_ago_to_delete * 60 * 60)))); then
regional_clusters_to_remove+=("$i")
echo "$i $creation_time (older than $hours_ago_to_delete hours)"
else
echo "$i $creation_time (less than $hours_ago_to_delete hours old)"
fi
else
echo "GKE cluster creation time not in expected time format: $creation_time"
exit 1
fi
done
if [[ ${#all_zonal[@]} -eq 0 && ${#all_regional[@]} -eq 0 ]]; then
echo "none"
fi
echo
if [[ ${#clusters_to_remove[@]} -eq 0 ]]; then
echo "No old orphaned GKE clusters found to remove."
if [[ ${#zonal_clusters_to_remove[@]} -eq 0 ]]; then
echo "No old orphaned zonal GKE clusters found to remove."
else
echo "Removing ${#clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_ZONE: ${clusters_to_remove[*]} ..."
gcloud container clusters delete --zone "${CLUSTER_ZONE}" --quiet ${clusters_to_remove[*]}
echo "Removing ${#zonal_clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_ZONE: ${zonal_clusters_to_remove[*]} ..."
gcloud container clusters delete --zone "${CLUSTER_ZONE}" --quiet ${zonal_clusters_to_remove[*]}
fi
echo
if [[ ${#regional_clusters_to_remove[@]} -eq 0 ]]; then
echo "No old orphaned regional GKE clusters found to remove."
else
echo "Removing ${#regional_clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_REGION: ${regional_clusters_to_remove[*]} ..."
gcloud container clusters delete --region "${CLUSTER_REGION}" --quiet ${regional_clusters_to_remove[*]}
fi
echo

View File

@@ -1,4 +1,4 @@
# Copyright 2020-2024 the Pinniped contributors. All Rights Reserved.
# Copyright 2020-2025 the Pinniped contributors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
---
@@ -7,6 +7,7 @@ inputs:
- name: pinniped-ci
outputs:
params:
CLUSTER_REGION:
CLUSTER_ZONE:
GCP_PROJECT:
GCP_SERVICE_ACCOUNT: