Don't fail backup deletion if downloading tarball fails (#2993)

* Don't fail backup if downloading tarball fails

Previously, we would always attempt to download the tarball for a backup
for processing DeleteItemAction plugins, even if there weren't any.
This caused an issue for some users in the case where the backup tarball
had been deleted from object storage as the backup deletion would fail.

Now, we only attempt to download the tarball in the case where there are
DeleteItemAction plugins. If downloading that tarball fails, we log
the error, skip the processing of the DeleteItemAction plugins and
proceed with the rest of the deletion.

Signed-off-by: Bridget McErlean <bmcerlean@vmware.com>

* Skip file removal in closeAndRemoveFile if nil

Signed-off-by: Bridget McErlean <bmcerlean@vmware.com>
This commit is contained in:
Bridget McErlean
2020-11-30 13:58:34 -05:00
committed by GitHub
parent aa47309700
commit a877354750
4 changed files with 289 additions and 20 deletions

View File

@@ -0,0 +1 @@
Fixed an issue where the deletion of a backup would fail if the backup tarball couldn't be downloaded from object storage. Now the tarball is only downloaded if there are associated DeleteItemAction plugins and if downloading the tarball fails, the plugins are skipped.

View File

@@ -734,6 +734,10 @@ func persistBackup(backup *pkgbackup.Request,
}
func closeAndRemoveFile(file *os.File, log logrus.FieldLogger) {
if file == nil {
log.Debug("Skipping removal of file due to nil file pointer")
return
}
if err := file.Close(); err != nil {
log.WithError(err).WithField("file", file.Name()).Error("error closing file")
}

View File

@@ -295,13 +295,6 @@ func (c *backupDeletionController) processRequest(req *velerov1api.DeleteBackupR
errs = append(errs, err.Error())
}
// Download the tarball
backupFile, err := downloadToTempFile(backup.Name, backupStore, log)
if err != nil {
return errors.Wrap(err, "error downloading backup")
}
defer closeAndRemoveFile(backupFile, c.logger)
actions, err := pluginManager.GetDeleteItemActions()
log.Debugf("%d actions before invoking actions", len(actions))
if err != nil {
@@ -309,20 +302,30 @@ func (c *backupDeletionController) processRequest(req *velerov1api.DeleteBackupR
}
// don't defer CleanupClients here, since it was already called above.
ctx := &delete.Context{
Backup: backup,
BackupReader: backupFile,
Actions: actions,
Log: c.logger,
DiscoveryHelper: c.helper,
Filesystem: filesystem.NewFileSystem(),
}
if len(actions) > 0 {
// Download the tarball
backupFile, err := downloadToTempFile(backup.Name, backupStore, log)
defer closeAndRemoveFile(backupFile, c.logger)
// Optimization: wrap in a gofunc? Would be useful for large backups with lots of objects.
// but what do we do with the error returned? We can't just swallow it as that may lead to dangling resources.
err = delete.InvokeDeleteActions(ctx)
if err != nil {
return errors.Wrap(err, "error invoking delete item actions")
if err != nil {
log.WithError(err).Errorf("Unable to download tarball for backup %s, skipping associated DeleteItemAction plugins", backup.Name)
} else {
ctx := &delete.Context{
Backup: backup,
BackupReader: backupFile,
Actions: actions,
Log: c.logger,
DiscoveryHelper: c.helper,
Filesystem: filesystem.NewFileSystem(),
}
// Optimization: wrap in a gofunc? Would be useful for large backups with lots of objects.
// but what do we do with the error returned? We can't just swallow it as that may lead to dangling resources.
err = delete.InvokeDeleteActions(ctx)
if err != nil {
return errors.Wrap(err, "error invoking delete item actions")
}
}
}
if backupStore != nil {

View File

@@ -49,6 +49,8 @@ import (
persistencemocks "github.com/vmware-tanzu/velero/pkg/persistence/mocks"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
"github.com/vmware-tanzu/velero/pkg/plugin/velero/mocks"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
"github.com/vmware-tanzu/velero/pkg/volume"
)
@@ -739,6 +741,265 @@ func TestBackupDeletionControllerProcessRequest(t *testing.T) {
// Make sure snapshot was deleted
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
})
t.Run("backup is not downloaded when there are no DeleteItemAction plugins", func(t *testing.T) {
backup := builder.ForBackup(velerov1api.DefaultNamespace, "foo").Result()
backup.UID = "uid"
backup.Spec.StorageLocation = "primary"
td := setupBackupDeletionControllerTest(t, backup)
location := &velerov1api.BackupStorageLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: backup.Spec.StorageLocation,
},
Spec: velerov1api.BackupStorageLocationSpec{
Provider: "objStoreProvider",
StorageType: velerov1api.StorageType{
ObjectStorage: &velerov1api.ObjectStorageLocation{
Bucket: "bucket",
},
},
},
}
require.NoError(t, td.fakeClient.Create(context.Background(), location))
snapshotLocation := &velerov1api.VolumeSnapshotLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: "vsl-1",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
}
require.NoError(t, td.sharedInformers.Velero().V1().VolumeSnapshotLocations().Informer().GetStore().Add(snapshotLocation))
// Clear out req labels to make sure the controller adds them and does not
// panic when encountering a nil Labels map
// (https://github.com/vmware-tanzu/velero/issues/1546)
td.req.Labels = nil
td.client.PrependReactor("get", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})
td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1")
td.client.PrependReactor("patch", "deletebackuprequests", func(action core.Action) (bool, runtime.Object, error) {
return true, td.req, nil
})
td.client.PrependReactor("patch", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})
snapshots := []*volume.Snapshot{
{
Spec: volume.SnapshotSpec{
Location: "vsl-1",
},
Status: volume.SnapshotStatus{
ProviderSnapshotID: "snap-1",
},
},
}
pluginManager := &pluginmocks.Manager{}
pluginManager.On("GetVolumeSnapshotter", "provider-1").Return(td.volumeSnapshotter, nil)
pluginManager.On("GetDeleteItemActions").Return([]velero.DeleteItemAction{}, nil)
pluginManager.On("CleanupClients")
td.controller.newPluginManager = func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }
td.backupStore.On("GetBackupVolumeSnapshots", td.req.Spec.BackupName).Return(snapshots, nil)
td.backupStore.On("DeleteBackup", td.req.Spec.BackupName).Return(nil)
err := td.controller.processRequest(td.req)
require.NoError(t, err)
td.backupStore.AssertNotCalled(t, "GetBackupContents", td.req.Spec.BackupName)
expectedActions := []core.Action{
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-name":"foo"}},"status":{"phase":"InProgress"}}`),
),
core.NewGetAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-uid":"uid"}}}`),
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
types.MergePatchType,
[]byte(`{"status":{"phase":"Deleting"}}`),
),
core.NewDeleteAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"status":{"phase":"Processed"}}`),
),
core.NewDeleteCollectionAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
pkgbackup.NewDeleteBackupRequestListOptions(td.req.Spec.BackupName, "uid"),
),
}
velerotest.CompareActions(t, expectedActions, td.client.Actions())
// Make sure snapshot was deleted
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
})
t.Run("backup is still deleted if downloading tarball fails for DeleteItemAction plugins", func(t *testing.T) {
backup := builder.ForBackup(velerov1api.DefaultNamespace, "foo").Result()
backup.UID = "uid"
backup.Spec.StorageLocation = "primary"
td := setupBackupDeletionControllerTest(t, backup)
location := &velerov1api.BackupStorageLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: backup.Spec.StorageLocation,
},
Spec: velerov1api.BackupStorageLocationSpec{
Provider: "objStoreProvider",
StorageType: velerov1api.StorageType{
ObjectStorage: &velerov1api.ObjectStorageLocation{
Bucket: "bucket",
},
},
},
}
require.NoError(t, td.fakeClient.Create(context.Background(), location))
snapshotLocation := &velerov1api.VolumeSnapshotLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: "vsl-1",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
}
require.NoError(t, td.sharedInformers.Velero().V1().VolumeSnapshotLocations().Informer().GetStore().Add(snapshotLocation))
// Clear out req labels to make sure the controller adds them and does not
// panic when encountering a nil Labels map
// (https://github.com/vmware-tanzu/velero/issues/1546)
td.req.Labels = nil
td.client.PrependReactor("get", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})
td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1")
td.client.PrependReactor("patch", "deletebackuprequests", func(action core.Action) (bool, runtime.Object, error) {
return true, td.req, nil
})
td.client.PrependReactor("patch", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})
snapshots := []*volume.Snapshot{
{
Spec: volume.SnapshotSpec{
Location: "vsl-1",
},
Status: volume.SnapshotStatus{
ProviderSnapshotID: "snap-1",
},
},
}
pluginManager := &pluginmocks.Manager{}
pluginManager.On("GetVolumeSnapshotter", "provider-1").Return(td.volumeSnapshotter, nil)
pluginManager.On("GetDeleteItemActions").Return([]velero.DeleteItemAction{new(mocks.DeleteItemAction)}, nil)
pluginManager.On("CleanupClients")
td.controller.newPluginManager = func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }
td.backupStore.On("GetBackupVolumeSnapshots", td.req.Spec.BackupName).Return(snapshots, nil)
td.backupStore.On("GetBackupContents", td.req.Spec.BackupName).Return(nil, fmt.Errorf("error downloading tarball"))
td.backupStore.On("DeleteBackup", td.req.Spec.BackupName).Return(nil)
err := td.controller.processRequest(td.req)
require.NoError(t, err)
expectedActions := []core.Action{
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-name":"foo"}},"status":{"phase":"InProgress"}}`),
),
core.NewGetAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-uid":"uid"}}}`),
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
types.MergePatchType,
[]byte(`{"status":{"phase":"Deleting"}}`),
),
core.NewDeleteAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"status":{"phase":"Processed"}}`),
),
core.NewDeleteCollectionAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
pkgbackup.NewDeleteBackupRequestListOptions(td.req.Spec.BackupName, "uid"),
),
}
velerotest.CompareActions(t, expectedActions, td.client.Actions())
// Make sure snapshot was deleted
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
})
}
func TestBackupDeletionControllerDeleteExpiredRequests(t *testing.T) {