diff --git a/Makefile b/Makefile index df82b92ef..8dcf6ee50 100644 --- a/Makefile +++ b/Makefile @@ -125,6 +125,7 @@ all-containers: container-builder-env @$(MAKE) --no-print-directory container BIN=velero-restic-restore-helper local: build-dirs +# Add DEBUG=1 to enable debug locally GOOS=$(GOOS) \ GOARCH=$(GOARCH) \ VERSION=$(VERSION) \ diff --git a/changelogs/unreleased/4887-reasonerjt b/changelogs/unreleased/4887-reasonerjt new file mode 100644 index 000000000..069850f9c --- /dev/null +++ b/changelogs/unreleased/4887-reasonerjt @@ -0,0 +1 @@ +Delete orphan CSI snapshots in backup sync controller \ No newline at end of file diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index e366ca329..8b8ba1699 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -302,6 +302,7 @@ func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*s scheme := runtime.NewScheme() velerov1api.AddToScheme(scheme) corev1api.AddToScheme(scheme) + snapshotv1api.AddToScheme(scheme) ctrl.SetLogger(logrusr.NewLogger(logger)) @@ -599,6 +600,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string s.mgr.GetClient(), s.veleroClient.VeleroV1(), s.sharedInformerFactory.Velero().V1().Backups().Lister(), + csiVSLister, s.config.backupSyncPeriod, s.namespace, s.csiSnapshotClient, @@ -863,7 +865,6 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string if err := s.mgr.Start(s.ctx); err != nil { s.logger.Fatal("Problem starting manager", err) } - return nil } diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index 48f35fb82..45559f8b3 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -679,7 +679,7 @@ func (c *backupController) runBackup(backup *pkgbackup.Request) error { backup.Status.CSIVolumeSnapshotsAttempted = len(backup.CSISnapshots) for _, vs := range backup.CSISnapshots { - if *vs.Status.ReadyToUse { + if vs.Status != nil && boolptr.IsSetToTrue(vs.Status.ReadyToUse) { backup.Status.CSIVolumeSnapshotsCompleted++ } } diff --git a/pkg/controller/backup_sync_controller.go b/pkg/controller/backup_sync_controller.go index 6a1f7c562..2a96b139d 100644 --- a/pkg/controller/backup_sync_controller.go +++ b/pkg/controller/backup_sync_controller.go @@ -20,7 +20,9 @@ import ( "context" "time" + snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1" snapshotterClientSet "github.com/kubernetes-csi/external-snapshotter/client/v4/clientset/versioned" + snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1" "github.com/pkg/errors" "github.com/sirupsen/logrus" kuberrs "k8s.io/apimachinery/pkg/api/errors" @@ -29,6 +31,8 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes" + "github.com/vmware-tanzu/velero/pkg/util/kube" + "github.com/vmware-tanzu/velero/internal/storage" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/features" @@ -48,6 +52,7 @@ type backupSyncController struct { kbClient client.Client podVolumeBackupClient velerov1client.PodVolumeBackupsGetter backupLister velerov1listers.BackupLister + csiVSLister snapshotv1listers.VolumeSnapshotLister csiSnapshotClient *snapshotterClientSet.Clientset kubeClient kubernetes.Interface namespace string @@ -62,6 +67,7 @@ func NewBackupSyncController( kbClient client.Client, podVolumeBackupClient velerov1client.PodVolumeBackupsGetter, backupLister velerov1listers.BackupLister, + csiVSLister snapshotv1listers.VolumeSnapshotLister, syncPeriod time.Duration, namespace string, csiSnapshotClient *snapshotterClientSet.Clientset, @@ -85,6 +91,7 @@ func NewBackupSyncController( defaultBackupLocation: defaultBackupLocation, defaultBackupSyncPeriod: syncPeriod, backupLister: backupLister, + csiVSLister: csiVSLister, csiSnapshotClient: csiSnapshotClient, kubeClient: kubeClient, @@ -358,11 +365,34 @@ func (c *backupSyncController) deleteOrphanedBackups(locationName string, backup if backup.Status.Phase != velerov1api.BackupPhaseCompleted || backupStoreBackups.Has(backup.Name) { continue } - if err := c.backupClient.Backups(backup.Namespace).Delete(context.TODO(), backup.Name, metav1.DeleteOptions{}); err != nil { log.WithError(errors.WithStack(err)).Error("Error deleting orphaned backup from cluster") } else { log.Debug("Deleted orphaned backup from cluster") + c.deleteCSISnapshotsByBackup(backup.Name, log) } } } + +func (c *backupSyncController) deleteCSISnapshotsByBackup(backupName string, log logrus.FieldLogger) { + if !features.IsEnabled(velerov1api.CSIFeatureFlag) { + return + } + m := client.MatchingLabels{velerov1api.BackupNameLabel: label.GetValidName(backupName)} + if vsList, err := c.csiVSLister.List(label.NewSelectorForBackup(label.GetValidName(backupName))); err != nil { + log.WithError(err).Warnf("Failed to list volumesnapshots for backup: %s, the deletion will be skipped", backupName) + } else { + for _, vs := range vsList { + name := kube.NamespaceAndName(vs.GetObjectMeta()) + log.Debugf("Deleting volumesnapshot %s", name) + if err := c.kbClient.Delete(context.TODO(), vs); err != nil { + log.WithError(err).Warnf("Failed to delete volumesnapshot %s", name) + } + } + } + vsc := &snapshotv1api.VolumeSnapshotContent{} + log.Debugf("Deleting volumesnapshotcontents for backup: %s", backupName) + if err := c.kbClient.DeleteAllOf(context.TODO(), vsc, m); err != nil { + log.WithError(err).Warnf("Failed to delete volumesnapshotcontents for backup: %s", backupName) + } +} diff --git a/pkg/controller/backup_sync_controller_test.go b/pkg/controller/backup_sync_controller_test.go index 0b615bdb5..6ba825373 100644 --- a/pkg/controller/backup_sync_controller_test.go +++ b/pkg/controller/backup_sync_controller_test.go @@ -344,6 +344,7 @@ func TestBackupSyncControllerRun(t *testing.T) { fakeClient, client.VeleroV1(), sharedInformers.Velero().V1().Backups().Lister(), + nil, // csiVSLister time.Duration(0), test.namespace, nil, // csiSnapshotClient @@ -565,6 +566,7 @@ func TestDeleteOrphanedBackups(t *testing.T) { fakeClient, client.VeleroV1(), sharedInformers.Velero().V1().Backups().Lister(), + nil, // csiVSLister time.Duration(0), test.namespace, nil, // csiSnapshotClient @@ -659,6 +661,7 @@ func TestStorageLabelsInDeleteOrphanedBackups(t *testing.T) { fakeClient, client.VeleroV1(), sharedInformers.Velero().V1().Backups().Lister(), + nil, // csiVSLister time.Duration(0), test.namespace, nil, // csiSnapshotClient diff --git a/pkg/util/csi/reset.go b/pkg/util/csi/reset.go index efe10ad01..5065aae78 100644 --- a/pkg/util/csi/reset.go +++ b/pkg/util/csi/reset.go @@ -37,6 +37,10 @@ func ResetVolumeSnapshotContent(snapCont *snapshotv1api.VolumeSnapshotContent) e return fmt.Errorf("the volumesnapshotcontent '%s' does not have snapshothandle set", snapCont.Name) } - snapCont.Spec.VolumeSnapshotRef = corev1.ObjectReference{} + // set the VolumeSnapshotRef to non-existing one to bypass the validation webhook + snapCont.Spec.VolumeSnapshotRef = corev1.ObjectReference{ + Namespace: fmt.Sprintf("ns-%s", snapCont.UID), + Name: fmt.Sprintf("name-%s", snapCont.UID), + } return nil }