From a8773547507d408382e423b1b0ce245aebb1ed31 Mon Sep 17 00:00:00 2001 From: Bridget McErlean Date: Mon, 30 Nov 2020 13:58:34 -0500 Subject: [PATCH] Don't fail backup deletion if downloading tarball fails (#2993) * Don't fail backup if downloading tarball fails Previously, we would always attempt to download the tarball for a backup for processing DeleteItemAction plugins, even if there weren't any. This caused an issue for some users in the case where the backup tarball had been deleted from object storage as the backup deletion would fail. Now, we only attempt to download the tarball in the case where there are DeleteItemAction plugins. If downloading that tarball fails, we log the error, skip the processing of the DeleteItemAction plugins and proceed with the rest of the deletion. Signed-off-by: Bridget McErlean * Skip file removal in closeAndRemoveFile if nil Signed-off-by: Bridget McErlean --- changelogs/unreleased/2993-zubron | 1 + pkg/controller/backup_controller.go | 4 + pkg/controller/backup_deletion_controller.go | 43 +-- .../backup_deletion_controller_test.go | 261 ++++++++++++++++++ 4 files changed, 289 insertions(+), 20 deletions(-) create mode 100644 changelogs/unreleased/2993-zubron diff --git a/changelogs/unreleased/2993-zubron b/changelogs/unreleased/2993-zubron new file mode 100644 index 000000000..d67758092 --- /dev/null +++ b/changelogs/unreleased/2993-zubron @@ -0,0 +1 @@ +Fixed an issue where the deletion of a backup would fail if the backup tarball couldn't be downloaded from object storage. Now the tarball is only downloaded if there are associated DeleteItemAction plugins and if downloading the tarball fails, the plugins are skipped. \ No newline at end of file diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index a7ace7cf5..f77b43ce2 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -734,6 +734,10 @@ func persistBackup(backup *pkgbackup.Request, } func closeAndRemoveFile(file *os.File, log logrus.FieldLogger) { + if file == nil { + log.Debug("Skipping removal of file due to nil file pointer") + return + } if err := file.Close(); err != nil { log.WithError(err).WithField("file", file.Name()).Error("error closing file") } diff --git a/pkg/controller/backup_deletion_controller.go b/pkg/controller/backup_deletion_controller.go index c2897ddbe..ffc0f8b9a 100644 --- a/pkg/controller/backup_deletion_controller.go +++ b/pkg/controller/backup_deletion_controller.go @@ -295,13 +295,6 @@ func (c *backupDeletionController) processRequest(req *velerov1api.DeleteBackupR errs = append(errs, err.Error()) } - // Download the tarball - backupFile, err := downloadToTempFile(backup.Name, backupStore, log) - if err != nil { - return errors.Wrap(err, "error downloading backup") - } - defer closeAndRemoveFile(backupFile, c.logger) - actions, err := pluginManager.GetDeleteItemActions() log.Debugf("%d actions before invoking actions", len(actions)) if err != nil { @@ -309,20 +302,30 @@ func (c *backupDeletionController) processRequest(req *velerov1api.DeleteBackupR } // don't defer CleanupClients here, since it was already called above. - ctx := &delete.Context{ - Backup: backup, - BackupReader: backupFile, - Actions: actions, - Log: c.logger, - DiscoveryHelper: c.helper, - Filesystem: filesystem.NewFileSystem(), - } + if len(actions) > 0 { + // Download the tarball + backupFile, err := downloadToTempFile(backup.Name, backupStore, log) + defer closeAndRemoveFile(backupFile, c.logger) - // Optimization: wrap in a gofunc? Would be useful for large backups with lots of objects. - // but what do we do with the error returned? We can't just swallow it as that may lead to dangling resources. - err = delete.InvokeDeleteActions(ctx) - if err != nil { - return errors.Wrap(err, "error invoking delete item actions") + if err != nil { + log.WithError(err).Errorf("Unable to download tarball for backup %s, skipping associated DeleteItemAction plugins", backup.Name) + } else { + ctx := &delete.Context{ + Backup: backup, + BackupReader: backupFile, + Actions: actions, + Log: c.logger, + DiscoveryHelper: c.helper, + Filesystem: filesystem.NewFileSystem(), + } + + // Optimization: wrap in a gofunc? Would be useful for large backups with lots of objects. + // but what do we do with the error returned? We can't just swallow it as that may lead to dangling resources. + err = delete.InvokeDeleteActions(ctx) + if err != nil { + return errors.Wrap(err, "error invoking delete item actions") + } + } } if backupStore != nil { diff --git a/pkg/controller/backup_deletion_controller_test.go b/pkg/controller/backup_deletion_controller_test.go index 212cc6e28..93741731b 100644 --- a/pkg/controller/backup_deletion_controller_test.go +++ b/pkg/controller/backup_deletion_controller_test.go @@ -49,6 +49,8 @@ import ( persistencemocks "github.com/vmware-tanzu/velero/pkg/persistence/mocks" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks" + "github.com/vmware-tanzu/velero/pkg/plugin/velero" + "github.com/vmware-tanzu/velero/pkg/plugin/velero/mocks" velerotest "github.com/vmware-tanzu/velero/pkg/test" "github.com/vmware-tanzu/velero/pkg/volume" ) @@ -739,6 +741,265 @@ func TestBackupDeletionControllerProcessRequest(t *testing.T) { // Make sure snapshot was deleted assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len()) }) + + t.Run("backup is not downloaded when there are no DeleteItemAction plugins", func(t *testing.T) { + backup := builder.ForBackup(velerov1api.DefaultNamespace, "foo").Result() + backup.UID = "uid" + backup.Spec.StorageLocation = "primary" + + td := setupBackupDeletionControllerTest(t, backup) + + location := &velerov1api.BackupStorageLocation{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: backup.Namespace, + Name: backup.Spec.StorageLocation, + }, + Spec: velerov1api.BackupStorageLocationSpec{ + Provider: "objStoreProvider", + StorageType: velerov1api.StorageType{ + ObjectStorage: &velerov1api.ObjectStorageLocation{ + Bucket: "bucket", + }, + }, + }, + } + + require.NoError(t, td.fakeClient.Create(context.Background(), location)) + + snapshotLocation := &velerov1api.VolumeSnapshotLocation{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: backup.Namespace, + Name: "vsl-1", + }, + Spec: velerov1api.VolumeSnapshotLocationSpec{ + Provider: "provider-1", + }, + } + require.NoError(t, td.sharedInformers.Velero().V1().VolumeSnapshotLocations().Informer().GetStore().Add(snapshotLocation)) + + // Clear out req labels to make sure the controller adds them and does not + // panic when encountering a nil Labels map + // (https://github.com/vmware-tanzu/velero/issues/1546) + td.req.Labels = nil + + td.client.PrependReactor("get", "backups", func(action core.Action) (bool, runtime.Object, error) { + return true, backup, nil + }) + td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1") + + td.client.PrependReactor("patch", "deletebackuprequests", func(action core.Action) (bool, runtime.Object, error) { + return true, td.req, nil + }) + + td.client.PrependReactor("patch", "backups", func(action core.Action) (bool, runtime.Object, error) { + return true, backup, nil + }) + + snapshots := []*volume.Snapshot{ + { + Spec: volume.SnapshotSpec{ + Location: "vsl-1", + }, + Status: volume.SnapshotStatus{ + ProviderSnapshotID: "snap-1", + }, + }, + } + + pluginManager := &pluginmocks.Manager{} + pluginManager.On("GetVolumeSnapshotter", "provider-1").Return(td.volumeSnapshotter, nil) + pluginManager.On("GetDeleteItemActions").Return([]velero.DeleteItemAction{}, nil) + pluginManager.On("CleanupClients") + td.controller.newPluginManager = func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager } + + td.backupStore.On("GetBackupVolumeSnapshots", td.req.Spec.BackupName).Return(snapshots, nil) + td.backupStore.On("DeleteBackup", td.req.Spec.BackupName).Return(nil) + + err := td.controller.processRequest(td.req) + require.NoError(t, err) + + td.backupStore.AssertNotCalled(t, "GetBackupContents", td.req.Spec.BackupName) + + expectedActions := []core.Action{ + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + td.req.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"velero.io/backup-name":"foo"}},"status":{"phase":"InProgress"}}`), + ), + core.NewGetAction( + velerov1api.SchemeGroupVersion.WithResource("backups"), + td.req.Namespace, + td.req.Spec.BackupName, + ), + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + td.req.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"velero.io/backup-uid":"uid"}}}`), + ), + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("backups"), + td.req.Namespace, + td.req.Spec.BackupName, + types.MergePatchType, + []byte(`{"status":{"phase":"Deleting"}}`), + ), + core.NewDeleteAction( + velerov1api.SchemeGroupVersion.WithResource("backups"), + td.req.Namespace, + td.req.Spec.BackupName, + ), + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + td.req.Name, + types.MergePatchType, + []byte(`{"status":{"phase":"Processed"}}`), + ), + core.NewDeleteCollectionAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + pkgbackup.NewDeleteBackupRequestListOptions(td.req.Spec.BackupName, "uid"), + ), + } + + velerotest.CompareActions(t, expectedActions, td.client.Actions()) + + // Make sure snapshot was deleted + assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len()) + }) + + t.Run("backup is still deleted if downloading tarball fails for DeleteItemAction plugins", func(t *testing.T) { + backup := builder.ForBackup(velerov1api.DefaultNamespace, "foo").Result() + backup.UID = "uid" + backup.Spec.StorageLocation = "primary" + + td := setupBackupDeletionControllerTest(t, backup) + + location := &velerov1api.BackupStorageLocation{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: backup.Namespace, + Name: backup.Spec.StorageLocation, + }, + Spec: velerov1api.BackupStorageLocationSpec{ + Provider: "objStoreProvider", + StorageType: velerov1api.StorageType{ + ObjectStorage: &velerov1api.ObjectStorageLocation{ + Bucket: "bucket", + }, + }, + }, + } + + require.NoError(t, td.fakeClient.Create(context.Background(), location)) + + snapshotLocation := &velerov1api.VolumeSnapshotLocation{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: backup.Namespace, + Name: "vsl-1", + }, + Spec: velerov1api.VolumeSnapshotLocationSpec{ + Provider: "provider-1", + }, + } + require.NoError(t, td.sharedInformers.Velero().V1().VolumeSnapshotLocations().Informer().GetStore().Add(snapshotLocation)) + + // Clear out req labels to make sure the controller adds them and does not + // panic when encountering a nil Labels map + // (https://github.com/vmware-tanzu/velero/issues/1546) + td.req.Labels = nil + + td.client.PrependReactor("get", "backups", func(action core.Action) (bool, runtime.Object, error) { + return true, backup, nil + }) + td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1") + + td.client.PrependReactor("patch", "deletebackuprequests", func(action core.Action) (bool, runtime.Object, error) { + return true, td.req, nil + }) + + td.client.PrependReactor("patch", "backups", func(action core.Action) (bool, runtime.Object, error) { + return true, backup, nil + }) + + snapshots := []*volume.Snapshot{ + { + Spec: volume.SnapshotSpec{ + Location: "vsl-1", + }, + Status: volume.SnapshotStatus{ + ProviderSnapshotID: "snap-1", + }, + }, + } + + pluginManager := &pluginmocks.Manager{} + pluginManager.On("GetVolumeSnapshotter", "provider-1").Return(td.volumeSnapshotter, nil) + pluginManager.On("GetDeleteItemActions").Return([]velero.DeleteItemAction{new(mocks.DeleteItemAction)}, nil) + pluginManager.On("CleanupClients") + td.controller.newPluginManager = func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager } + + td.backupStore.On("GetBackupVolumeSnapshots", td.req.Spec.BackupName).Return(snapshots, nil) + td.backupStore.On("GetBackupContents", td.req.Spec.BackupName).Return(nil, fmt.Errorf("error downloading tarball")) + td.backupStore.On("DeleteBackup", td.req.Spec.BackupName).Return(nil) + + err := td.controller.processRequest(td.req) + require.NoError(t, err) + + expectedActions := []core.Action{ + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + td.req.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"velero.io/backup-name":"foo"}},"status":{"phase":"InProgress"}}`), + ), + core.NewGetAction( + velerov1api.SchemeGroupVersion.WithResource("backups"), + td.req.Namespace, + td.req.Spec.BackupName, + ), + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + td.req.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"velero.io/backup-uid":"uid"}}}`), + ), + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("backups"), + td.req.Namespace, + td.req.Spec.BackupName, + types.MergePatchType, + []byte(`{"status":{"phase":"Deleting"}}`), + ), + core.NewDeleteAction( + velerov1api.SchemeGroupVersion.WithResource("backups"), + td.req.Namespace, + td.req.Spec.BackupName, + ), + core.NewPatchAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + td.req.Name, + types.MergePatchType, + []byte(`{"status":{"phase":"Processed"}}`), + ), + core.NewDeleteCollectionAction( + velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"), + td.req.Namespace, + pkgbackup.NewDeleteBackupRequestListOptions(td.req.Spec.BackupName, "uid"), + ), + } + + velerotest.CompareActions(t, expectedActions, td.client.Actions()) + + // Make sure snapshot was deleted + assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len()) + }) } func TestBackupDeletionControllerDeleteExpiredRequests(t *testing.T) {