From d1fdaecc94359009dc83307f1b6f6e93c565b755 Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Fri, 25 Mar 2022 05:01:55 -0400 Subject: [PATCH] Add labels to expired backups failing garbage collection. (#4757) * Add bsl related TTL gc errors to labelSelectors * if backup label map is nil, make map * clear label if not BSL error Signed-off-by: Tiger Kaovilai --- changelogs/unreleased/4757-kaovilai | 1 + pkg/controller/gc_controller.go | 26 +++++++++++++++++++++- site/content/docs/main/how-velero-works.md | 9 ++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 changelogs/unreleased/4757-kaovilai diff --git a/changelogs/unreleased/4757-kaovilai b/changelogs/unreleased/4757-kaovilai new file mode 100644 index 000000000..34229c044 --- /dev/null +++ b/changelogs/unreleased/4757-kaovilai @@ -0,0 +1 @@ +Garbage collector now adds labels to backups that failed to delete for BSLNotFound, BSLCannotGet, BSLReadOnly reasons. \ No newline at end of file diff --git a/pkg/controller/gc_controller.go b/pkg/controller/gc_controller.go index b65f7ede6..d365dcf41 100644 --- a/pkg/controller/gc_controller.go +++ b/pkg/controller/gc_controller.go @@ -39,7 +39,11 @@ import ( ) const ( - GCSyncPeriod = 60 * time.Minute + GCSyncPeriod = 60 * time.Minute + garbageCollectionFailure = "velero.io/gc-failure" + gcFailureBSLNotFound = "BSLNotFound" + gcFailureBSLCannotGet = "BSLCannotGet" + gcFailureBSLReadOnly = "BSLReadOnly" ) // gcController creates DeleteBackupRequests for expired backups. @@ -134,6 +138,10 @@ func (c *gcController) processQueueItem(key string) error { log.Info("Backup has expired") + if backup.Labels == nil { + backup.Labels = make(map[string]string) + } + loc := &velerov1api.BackupStorageLocation{} if err := c.kbClient.Get(context.Background(), client.ObjectKey{ Namespace: ns, @@ -141,15 +149,31 @@ func (c *gcController) processQueueItem(key string) error { }, loc); err != nil { if apierrors.IsNotFound(err) { log.Warnf("Backup cannot be garbage-collected because backup storage location %s does not exist", backup.Spec.StorageLocation) + backup.Labels[garbageCollectionFailure] = gcFailureBSLNotFound + } else { + backup.Labels[garbageCollectionFailure] = gcFailureBSLCannotGet + } + if err := c.kbClient.Update(context.Background(), backup); err != nil { + log.WithError(err).Error("error updating backup labels") } return errors.Wrap(err, "error getting backup storage location") } if loc.Spec.AccessMode == velerov1api.BackupStorageLocationAccessModeReadOnly { log.Infof("Backup cannot be garbage-collected because backup storage location %s is currently in read-only mode", loc.Name) + backup.Labels[garbageCollectionFailure] = gcFailureBSLReadOnly + if err := c.kbClient.Update(context.Background(), backup); err != nil { + log.WithError(err).Error("error updating backup labels") + } return nil } + // remove gc fail error label after this point + delete(backup.Labels, garbageCollectionFailure) + if err := c.kbClient.Update(context.Background(), backup); err != nil { + log.WithError(err).Error("error updating backup labels") + } + selector := labels.SelectorFromSet(labels.Set(map[string]string{ velerov1api.BackupNameLabel: label.GetValidName(backup.Name), velerov1api.BackupUIDLabel: string(backup.UID), diff --git a/site/content/docs/main/how-velero-works.md b/site/content/docs/main/how-velero-works.md index 250ab2dab..c6b2f2a71 100644 --- a/site/content/docs/main/how-velero-works.md +++ b/site/content/docs/main/how-velero-works.md @@ -71,6 +71,15 @@ When you create a backup, you can specify a TTL (time to live) by adding the fla The TTL flag allows the user to specify the backup retention period with the value specified in hours, minutes and seconds in the form `--ttl 24h0m0s`. If not specified, a default TTL value of 30 days will be applied. +If backup fails to delete, a label `velero.io/gc-failure=` will be added to the backup custom resource. + +You can use this label to filter and select backups that failed to delete. + +Implemented reasons are: +- BSLNotFound: Backup storage location not found +- BSLCannotGet: Backup storage location cannot be retrieved from the API server for reasons other than not found +- BSLReadOnly: Backup storage location is read-only + ## Object storage sync Velero treats object storage as the source of truth. It continuously checks to see that the correct backup resources are always present. If there is a properly formatted backup file in the storage bucket, but no corresponding backup resource in the Kubernetes API, Velero synchronizes the information from object storage to Kubernetes.