diff --git a/pipelines/concourse-workers/pipeline.yml b/pipelines/concourse-workers/pipeline.yml index ac0925026..f0f18080f 100644 --- a/pipelines/concourse-workers/pipeline.yml +++ b/pipelines/concourse-workers/pipeline.yml @@ -21,6 +21,13 @@ meta: GCP_USERNAME: ((gcp-instance-admin-username)) GCP_JSON_KEY: ((gcp-instance-admin-json-key)) + # GKE account info and which zone the clusters should be created in and deleted from. + gke_account_params: &gke_account_params + CLUSTER_ZONE: us-central1-c + GCP_PROJECT: ((gcp-project-name)) + GCP_SERVICE_ACCOUNT: ((gke-test-pool-manager-username)) + GCP_JSON_KEY: ((gke-test-pool-manager-json-key)) + resources: - name: pinniped-ci @@ -123,3 +130,19 @@ jobs: image: gcloud-image params: <<: *gcp_account_params + + - name: remove-orphaned-gke-clusters + public: true # all logs are publicly visible + plan: + - in_parallel: + - get: pinniped-ci + - get: gcloud-image + - get: hourly + trigger: true + - task: remove-orphaned-gke-clusters + attempts: 2 + timeout: 25m + file: pinniped-ci/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml + image: gcloud-image + params: + <<: *gke_account_params diff --git a/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh new file mode 100755 index 000000000..9923581e4 --- /dev/null +++ b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +# Copyright 2024 the Pinniped contributors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Sometimes something goes wrong with a GKE test job's cleanup and a +# GKE cluster gets orphaned, meaning that it is still running but no +# CI job is aware to clean it up. +# +# Find and delete all orphaned GKE clusters by deleting those which: +# 1. Are running in GCP with a name that indicates that it was auto-created for testing, +# 2. And are older than some number of hours since their creation time. +# +# Params are CLUSTER_ZONE, GCP_PROJECT, GCP_SERVICE_ACCOUNT, and GCP_JSON_KEY. + +set -euo pipefail + +gcloud auth activate-service-account \ + "$GCP_SERVICE_ACCOUNT" \ + --key-file <(echo "$GCP_JSON_KEY") \ + --project "$GCP_PROJECT" + +all_cloud=($(gcloud container clusters list \ + --zones "$CLUSTER_ZONE" --project "$GCP_PROJECT" \ + --filter "name:gke-*-zone-${CLUSTER_ZONE}" --format 'table[no-heading](name)' | sort)) + +now_in_seconds_since_epoch=$(date +"%s") +hours_ago_to_delete=2 +clusters_to_remove=() + +echo +echo "All auto-created GKE clusters (with creation time in UTC):" +for i in "${all_cloud[@]}"; do + creation_time=$(gcloud container clusters describe "$i" \ + --zone "$CLUSTER_ZONE" --project "$GCP_PROJECT" \ + --format 'table[no-heading](createTime.date(tz=UTC))') + # UTC date format example: 2022-04-01T17:01:59 + if [[ "$creation_time" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ ]]; then + # Note: on MacOS this date command would be: date -ju -f '%Y-%m-%dT%H:%M:%S' "$creation_time" '+%s' + creation_time_seconds_since_epoch=$(date -u -d "$creation_time" '+%s') + if (($((now_in_seconds_since_epoch - creation_time_seconds_since_epoch)) > $((hours_ago_to_delete * 60 * 60)))); then + clusters_to_remove+=("$i") + echo "$i $creation_time (older than $hours_ago_to_delete hours)" + else + echo "$i $creation_time (less than $hours_ago_to_delete hours old)" + fi + else + echo "GKE cluster creation time not in expected time format: $creation_time" + exit 1 + fi +done +if [[ ${#all_cloud[@]} -eq 0 ]]; then + echo "none" +fi + +echo +if [[ ${#clusters_to_remove[@]} -eq 0 ]]; then + echo "No old orphaned GKE clusters found to remove." +else + echo "Removing ${#clusters_to_remove[@]} GKE clusters(s) which are older than $hours_ago_to_delete hours in $CLUSTER_ZONE: ${clusters_to_remove[*]} ..." + echo Would run command: gcloud container clusters delete --zone "${CLUSTER_ZONE}" --quiet ${clusters_to_remove[*]} +fi + +echo +echo "Done!" diff --git a/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml new file mode 100644 index 000000000..dd28a386f --- /dev/null +++ b/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.yml @@ -0,0 +1,15 @@ +# Copyright 2020-2024 the Pinniped contributors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +--- +platform: linux +inputs: + - name: pinniped-ci +outputs: +params: + CLUSTER_ZONE: + GCP_PROJECT: + GCP_SERVICE_ACCOUNT: + GCP_JSON_KEY: +run: + path: pinniped-ci/pipelines/shared-tasks/remove-orphaned-gke-clusters/task.sh