diff --git a/changelogs/unreleased/9604-shubham-pampattiwar b/changelogs/unreleased/9604-shubham-pampattiwar new file mode 100644 index 000000000..f369a8af5 --- /dev/null +++ b/changelogs/unreleased/9604-shubham-pampattiwar @@ -0,0 +1 @@ +Fix DBR stuck when CSI snapshot no longer exists in cloud provider diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action.go b/internal/delete/actions/csi/volumesnapshotcontent_action.go index d12c7c43a..7a6724df1 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action.go @@ -137,6 +137,10 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( return checkVSCReadiness(ctx, &snapCont, p.crClient) }, ); err != nil { + // Clean up the VSC we created since it can't become ready + if deleteErr := p.crClient.Delete(context.TODO(), &snapCont); deleteErr != nil && !apierrors.IsNotFound(deleteErr) { + p.log.WithError(deleteErr).Errorf("Failed to clean up VolumeSnapshotContent %s", snapCont.Name) + } return errors.Wrapf(err, "fail to wait VolumeSnapshotContent %s becomes ready.", snapCont.Name) } @@ -167,6 +171,13 @@ var checkVSCReadiness = func( return true, nil } + // Fail fast on permanent CSI driver errors (e.g., InvalidSnapshot.NotFound) + if tmpVSC.Status != nil && tmpVSC.Status.Error != nil && tmpVSC.Status.Error.Message != nil { + return false, errors.Errorf( + "VolumeSnapshotContent %s has error: %s", vsc.Name, *tmpVSC.Status.Error.Message, + ) + } + return false, nil } diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go index 24baccdb2..7dbd6d7ff 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go @@ -94,6 +94,19 @@ func TestVSCExecute(t *testing.T) { return false, errors.Errorf("test error case") }, }, + { + name: "Error case with CSI error, dangling VSC should be cleaned up", + vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), + backup: builder.ForBackup("velero", "backup").ObjectMeta(builder.WithAnnotationsMap(map[string]string{velerov1api.ResourceTimeoutAnnotation: "5s"})).Result(), + expectErr: true, + function: func( + ctx context.Context, + vsc *snapshotv1api.VolumeSnapshotContent, + client crclient.Client, + ) (bool, error) { + return false, errors.Errorf("VolumeSnapshotContent %s has error: InvalidSnapshot.NotFound", vsc.Name) + }, + }, } for _, test := range tests { @@ -190,6 +203,24 @@ func TestCheckVSCReadiness(t *testing.T) { expectErr: false, ready: false, }, + { + name: "VSC has error from CSI driver", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vsc-1", + Namespace: "velero", + }, + Status: &snapshotv1api.VolumeSnapshotContentStatus{ + ReadyToUse: boolPtr(false), + Error: &snapshotv1api.VolumeSnapshotError{ + Message: stringPtr("InvalidSnapshot.NotFound: The snapshot 'snap-0abc123' does not exist."), + }, + }, + }, + createVSC: true, + expectErr: true, + ready: false, + }, } for _, test := range tests { @@ -207,3 +238,11 @@ func TestCheckVSCReadiness(t *testing.T) { }) } } + +func boolPtr(b bool) *bool { + return &b +} + +func stringPtr(s string) *string { + return &s +}