diff --git a/changelogs/unreleased/4757-kaovilai b/changelogs/unreleased/4757-kaovilai new file mode 100644 index 000000000..34229c044 --- /dev/null +++ b/changelogs/unreleased/4757-kaovilai @@ -0,0 +1 @@ +Garbage collector now adds labels to backups that failed to delete for BSLNotFound, BSLCannotGet, BSLReadOnly reasons. \ No newline at end of file diff --git a/pkg/controller/gc_controller.go b/pkg/controller/gc_controller.go index b65f7ede6..d365dcf41 100644 --- a/pkg/controller/gc_controller.go +++ b/pkg/controller/gc_controller.go @@ -39,7 +39,11 @@ import ( ) const ( - GCSyncPeriod = 60 * time.Minute + GCSyncPeriod = 60 * time.Minute + garbageCollectionFailure = "velero.io/gc-failure" + gcFailureBSLNotFound = "BSLNotFound" + gcFailureBSLCannotGet = "BSLCannotGet" + gcFailureBSLReadOnly = "BSLReadOnly" ) // gcController creates DeleteBackupRequests for expired backups. @@ -134,6 +138,10 @@ func (c *gcController) processQueueItem(key string) error { log.Info("Backup has expired") + if backup.Labels == nil { + backup.Labels = make(map[string]string) + } + loc := &velerov1api.BackupStorageLocation{} if err := c.kbClient.Get(context.Background(), client.ObjectKey{ Namespace: ns, @@ -141,15 +149,31 @@ func (c *gcController) processQueueItem(key string) error { }, loc); err != nil { if apierrors.IsNotFound(err) { log.Warnf("Backup cannot be garbage-collected because backup storage location %s does not exist", backup.Spec.StorageLocation) + backup.Labels[garbageCollectionFailure] = gcFailureBSLNotFound + } else { + backup.Labels[garbageCollectionFailure] = gcFailureBSLCannotGet + } + if err := c.kbClient.Update(context.Background(), backup); err != nil { + log.WithError(err).Error("error updating backup labels") } return errors.Wrap(err, "error getting backup storage location") } if loc.Spec.AccessMode == velerov1api.BackupStorageLocationAccessModeReadOnly { log.Infof("Backup cannot be garbage-collected because backup storage location %s is currently in read-only mode", loc.Name) + backup.Labels[garbageCollectionFailure] = gcFailureBSLReadOnly + if err := c.kbClient.Update(context.Background(), backup); err != nil { + log.WithError(err).Error("error updating backup labels") + } return nil } + // remove gc fail error label after this point + delete(backup.Labels, garbageCollectionFailure) + if err := c.kbClient.Update(context.Background(), backup); err != nil { + log.WithError(err).Error("error updating backup labels") + } + selector := labels.SelectorFromSet(labels.Set(map[string]string{ velerov1api.BackupNameLabel: label.GetValidName(backup.Name), velerov1api.BackupUIDLabel: string(backup.UID), diff --git a/site/content/docs/main/how-velero-works.md b/site/content/docs/main/how-velero-works.md index 250ab2dab..c6b2f2a71 100644 --- a/site/content/docs/main/how-velero-works.md +++ b/site/content/docs/main/how-velero-works.md @@ -71,6 +71,15 @@ When you create a backup, you can specify a TTL (time to live) by adding the fla The TTL flag allows the user to specify the backup retention period with the value specified in hours, minutes and seconds in the form `--ttl 24h0m0s`. If not specified, a default TTL value of 30 days will be applied. +If backup fails to delete, a label `velero.io/gc-failure=` will be added to the backup custom resource. + +You can use this label to filter and select backups that failed to delete. + +Implemented reasons are: +- BSLNotFound: Backup storage location not found +- BSLCannotGet: Backup storage location cannot be retrieved from the API server for reasons other than not found +- BSLReadOnly: Backup storage location is read-only + ## Object storage sync Velero treats object storage as the source of truth. It continuously checks to see that the correct backup resources are always present. If there is a properly formatted backup file in the storage bucket, but no corresponding backup resource in the Kubernetes API, Velero synchronizes the information from object storage to Kubernetes.