add CI job to remove orphaned GKE clusters

This commit is contained in:
Ryan Richard
2024-12-23 10:59:06 -08:00
parent b5e67330b1
commit b9fe22f3a5
3 changed files with 103 additions and 0 deletions

View File

@@ -21,6 +21,13 @@ meta:
GCP_USERNAME: ((gcp-instance-admin-username))
GCP_JSON_KEY: ((gcp-instance-admin-json-key))
# GKE account info and which zone the clusters should be created in and deleted from.
gke_account_params: &gke_account_params
CLUSTER_ZONE: us-central1-c
GCP_PROJECT: ((gcp-project-name))
GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username))
GCP_JSON_KEY: ((gke-test-pool-manager-json-key))
resources:
- name: pinniped-ci
@@ -123,3 +130,19 @@ jobs:
image: gcloud-image
params:
<<: *gcp_account_params
- name: remove-orphaned-gke-clusters
public: true # all logs are publicly visible
plan:
- in_parallel:
- get: pinniped-ci
- get: gcloud-image
- get: hourly
trigger: true
- task: remove-orphaned-gke-clusters
attempts: 2
timeout: 25m
file: pinniped-ci/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml
image: gcloud-image
params:
<<: *gke_account_params

View File

@@ -0,0 +1,65 @@
#!/usr/bin/env bash
# Copyright 2024 the Pinniped contributors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
# Sometimes something goes wrong with a GKE test job's cleanup and a
# GKE cluster gets orphaned, meaning that it is still running but no
# CI job is aware to clean it up.
#
# Find and delete all orphaned GKE clusters by deleting those which:
# 1. Are running in GCP with a name that indicates that it was auto-created for testing,
# 2. And are older than some number of hours since their creation time.
#
# Params are CLUSTER_ZONE, GCP_PROJECT, GCP_SERVICE_ACCOUNT, and GCP_JSON_KEY.
set -euo pipefail
gcloud auth activate-service-account \
"$GCP_SERVICE_ACCOUNT" \
--key-file <(echo "$GCP_JSON_KEY") \
--project "$GCP_PROJECT"
all_cloud=($(gcloud container clusters list \
--zones "$CLUSTER_ZONE" --project "$GCP_PROJECT" \
--filter "name:gke-*-zone-${CLUSTER_ZONE}" --format 'table[no-heading](name)' | sort))
now_in_seconds_since_epoch=$(date +"%s")
hours_ago_to_delete=2
clusters_to_remove=()
echo
echo "All auto-created GKE clusters (with creation time in UTC):"
for i in "${all_cloud[@]}"; do
creation_time=$(gcloud container clusters describe "$i" \
--zone "$CLUSTER_ZONE" --project "$GCP_PROJECT" \
--format 'table[no-heading](createTime.date(tz=UTC))')
# UTC date format example: 2022-04-01T17:01:59
if [[ "$creation_time" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ ]]; then
# Note: on MacOS this date command would be: date -ju -f '%Y-%m-%dT%H:%M:%S' "$creation_time" '+%s'
creation_time_seconds_since_epoch=$(date -u -d "$creation_time" '+%s')
if (($((now_in_seconds_since_epoch - creation_time_seconds_since_epoch)) > $((hours_ago_to_delete * 60 * 60)))); then
clusters_to_remove+=("$i")
echo "$i $creation_time (older than $hours_ago_to_delete hours)"
else
echo "$i $creation_time (less than $hours_ago_to_delete hours old)"
fi
else
echo "GKE cluster creation time not in expected time format: $creation_time"
exit 1
fi
done
if [[ ${#all_cloud[@]} -eq 0 ]]; then
echo "none"
fi
echo
if [[ ${#clusters_to_remove[@]} -eq 0 ]]; then
echo "No old orphaned GKE clusters found to remove."
else
echo "Removing ${#clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_ZONE: ${clusters_to_remove[*]} ..."
echo Would run command: gcloud container clusters delete --zone "${CLUSTER_ZONE}" --quiet ${clusters_to_remove[*]}
fi
echo
echo "Done!"

View File

@@ -0,0 +1,15 @@
# Copyright 2020-2024 the Pinniped contributors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
---
platform: linux
inputs:
- name: pinniped-ci
outputs:
params:
CLUSTER_ZONE:
GCP_PROJECT:
GCP_SERVICE_ACCOUNT:
GCP_JSON_KEY:
run:
path: pinniped-ci/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh