From 70043af85b2a1fd7b03e1a6465db625e1759a743 Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Fri, 13 Mar 2026 11:24:09 +0800 Subject: [PATCH 01/12] Add more check for file extraction from tarball. Signed-off-by: Xun Jiang --- changelogs/unreleased/9614-blackpiglet | 1 + pkg/archive/extractor.go | 18 +++++++++++++++++- pkg/archive/extractor_test.go | 26 ++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 changelogs/unreleased/9614-blackpiglet diff --git a/changelogs/unreleased/9614-blackpiglet b/changelogs/unreleased/9614-blackpiglet new file mode 100644 index 000000000..6d15e4f2e --- /dev/null +++ b/changelogs/unreleased/9614-blackpiglet @@ -0,0 +1 @@ +Add check for file extraction from tarball. \ No newline at end of file diff --git a/pkg/archive/extractor.go b/pkg/archive/extractor.go index 87d9ab413..aae9a7a85 100644 --- a/pkg/archive/extractor.go +++ b/pkg/archive/extractor.go @@ -19,8 +19,10 @@ package archive import ( "archive/tar" "compress/gzip" + "fmt" "io" "path/filepath" + "strings" "github.com/sirupsen/logrus" @@ -66,6 +68,16 @@ func (e *Extractor) writeFile(target string, tarRdr *tar.Reader) error { return nil } +// sanitizeArchivePath sanitizes archive file path from "G305: Zip Slip vulnerability" +func sanitizeArchivePath(destDir, sourcePath string) (targetPath string, err error) { + targetPath = filepath.Join(destDir, sourcePath) + if strings.HasPrefix(targetPath, filepath.Clean(destDir)) { + return targetPath, nil + } + + return "", fmt.Errorf("invalid archive path %q: escapes target directory", sourcePath) +} + func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) { dir, err := e.fs.TempDir("", "") if err != nil { @@ -84,7 +96,11 @@ func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) { return "", err } - target := filepath.Join(dir, header.Name) //nolint:gosec // Internal usage. No need to check. + target, err := sanitizeArchivePath(dir, header.Name) + if err != nil { + e.log.Infof("error sanitizing archive path: %s", err.Error()) + return "", err + } switch header.Typeflag { case tar.TypeDir: diff --git a/pkg/archive/extractor_test.go b/pkg/archive/extractor_test.go index 47cea734c..a4daf02ca 100644 --- a/pkg/archive/extractor_test.go +++ b/pkg/archive/extractor_test.go @@ -18,6 +18,7 @@ package archive import ( "archive/tar" + "bytes" "compress/gzip" "io" "os" @@ -87,6 +88,31 @@ func TestUnzipAndExtractBackup(t *testing.T) { } } +func TestUnzipAndExtractBackupRejectsPathTraversal(t *testing.T) { + ext := NewExtractor(test.NewLogger(), test.NewFakeFileSystem()) + + var buf bytes.Buffer + gzw := gzip.NewWriter(&buf) + tw := tar.NewWriter(gzw) + + err := tw.WriteHeader(&tar.Header{ + Name: "../escape.txt", + Mode: 0600, + Typeflag: tar.TypeReg, + Size: int64(len("data")), + }) + require.NoError(t, err) + + _, err = tw.Write([]byte("data")) + require.NoError(t, err) + require.NoError(t, tw.Close()) + require.NoError(t, gzw.Close()) + + _, err = ext.UnzipAndExtractBackup(&buf) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid archive path") +} + func createArchive(files []string, fs filesystem.Interface) (string, error) { outName := "output.tar.gz" out, err := fs.Create(outName) From ade433ecbd081abe458269ac4c8121880a0fb926 Mon Sep 17 00:00:00 2001 From: Priyansh Choudhary Date: Thu, 19 Mar 2026 02:30:28 +0530 Subject: [PATCH 02/12] Implement original VolumeSnapshotContent deletion for legacy backups Signed-off-by: Priyansh Choudhary --- .../csi/volumesnapshotcontent_action.go | 52 ++++++++++++ .../csi/volumesnapshotcontent_action_test.go | 84 +++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action.go b/internal/delete/actions/csi/volumesnapshotcontent_action.go index 7a6724df1..69d604e41 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action.go @@ -81,6 +81,17 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( p.log.Infof("Deleting VolumeSnapshotContent %s", snapCont.Name) + // Try to delete the original VSC from the cluster first. + // This handles legacy (pre-1.15) backups where the original VSC + // with DeletionPolicy=Retain still exists in the cluster. + originalVSCName := snapCont.Name + if cleaned := p.tryDeleteOriginalVSC(context.TODO(), originalVSCName); cleaned { + p.log.Infof("Successfully deleted original VolumeSnapshotContent %s from cluster, skipping temp VSC creation", originalVSCName) + return nil + } + + // create a temp VSC to trigger cloud snapshot deletion + // (for backups where the original VSC no longer exists in cluster) uuid, err := uuid.NewRandom() if err != nil { p.log.WithError(err).Errorf("Fail to generate the UUID to create VSC %s", snapCont.Name) @@ -155,6 +166,47 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( return nil } +// tryDeleteOriginalVSC attempts to find and delete the original VSC from +// the cluster (legacy pre-1.15 backups). It patches the DeletionPolicy to +// Delete so the CSI driver also removes the cloud snapshot, then deletes +// the VSC object itself. +// Returns true if the original VSC was found and deletion was initiated. +func (p *volumeSnapshotContentDeleteItemAction) tryDeleteOriginalVSC( + ctx context.Context, + vscName string, +) bool { + existing := new(snapshotv1api.VolumeSnapshotContent) + if err := p.crClient.Get(ctx, crclient.ObjectKey{Name: vscName}, existing); err != nil { + if apierrors.IsNotFound(err) { + p.log.Debugf("Original VolumeSnapshotContent %s not found in cluster, will use temp VSC flow", vscName) + } else { + p.log.Debugf("Error looking up original VolumeSnapshotContent %s, will use temp VSC flow", vscName) + } + return false + } + + p.log.Debugf("Found original VolumeSnapshotContent %s in cluster (legacy backup), cleaning up directly", vscName) + + // Patch DeletionPolicy to Delete so the CSI driver removes the cloud snapshot + if existing.Spec.DeletionPolicy != snapshotv1api.VolumeSnapshotContentDelete { + original := existing.DeepCopy() + existing.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentDelete + if err := p.crClient.Patch(ctx, existing, crclient.MergeFrom(original)); err != nil { + p.log.WithError(err).Debugf("Failed to patch DeletionPolicy on original VSC %s, will use temp VSC flow", vscName) + return false + } + p.log.Debugf("Patched DeletionPolicy to Delete on original VolumeSnapshotContent %s", vscName) + } + + // Delete the original VSC — the CSI driver will clean up the cloud snapshot + if err := p.crClient.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + p.log.WithError(err).Debugf("Failed to delete original VolumeSnapshotContent %s, will use temp VSC flow", vscName) + return false + } + + return true +} + var checkVSCReadiness = func( ctx context.Context, vsc *snapshotv1api.VolumeSnapshotContent, diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go index 7dbd6d7ff..76ebe4152 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go @@ -25,6 +25,8 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + corev1api "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -239,6 +241,88 @@ func TestCheckVSCReadiness(t *testing.T) { } } +func TestTryDeleteOriginalVSC(t *testing.T) { + tests := []struct { + name string + vscName string + existing *snapshotv1api.VolumeSnapshotContent + createIt bool + expectRet bool + }{ + { + name: "VSC not found in cluster, returns false", + vscName: "not-found", + expectRet: false, + }, + { + name: "VSC found with Retain policy, patches and deletes", + vscName: "legacy-vsc", + existing: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{Name: "legacy-vsc"}, + Spec: snapshotv1api.VolumeSnapshotContentSpec{ + DeletionPolicy: snapshotv1api.VolumeSnapshotContentRetain, + Driver: "disk.csi.azure.com", + Source: snapshotv1api.VolumeSnapshotContentSource{ + SnapshotHandle: stringPtr("snap-123"), + }, + VolumeSnapshotRef: corev1api.ObjectReference{ + Name: "vs-1", + Namespace: "default", + }, + }, + }, + createIt: true, + expectRet: true, + }, + { + name: "VSC found with Delete policy already, just deletes", + vscName: "already-delete-vsc", + existing: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{Name: "already-delete-vsc"}, + Spec: snapshotv1api.VolumeSnapshotContentSpec{ + DeletionPolicy: snapshotv1api.VolumeSnapshotContentDelete, + Driver: "disk.csi.azure.com", + Source: snapshotv1api.VolumeSnapshotContentSource{ + SnapshotHandle: stringPtr("snap-456"), + }, + VolumeSnapshotRef: corev1api.ObjectReference{ + Name: "vs-2", + Namespace: "default", + }, + }, + }, + createIt: true, + expectRet: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + crClient := velerotest.NewFakeControllerRuntimeClient(t) + logger := logrus.StandardLogger() + p := &volumeSnapshotContentDeleteItemAction{ + log: logger, + crClient: crClient, + } + + if test.createIt && test.existing != nil { + require.NoError(t, crClient.Create(t.Context(), test.existing)) + } + + result := p.tryDeleteOriginalVSC(t.Context(), test.vscName) + require.Equal(t, test.expectRet, result) + + // If cleanup succeeded, verify the VSC is gone + if test.expectRet { + err := crClient.Get(t.Context(), crclient.ObjectKey{Name: test.vscName}, + &snapshotv1api.VolumeSnapshotContent{}) + require.True(t, apierrors.IsNotFound(err), + "VSC should have been deleted from cluster") + } + }) + } +} + func boolPtr(b bool) *bool { return &b } From fce276bca94f8eb95dfeadde49eccb6e3e3e9c3f Mon Sep 17 00:00:00 2001 From: Priyansh Choudhary Date: Thu, 19 Mar 2026 02:38:06 +0530 Subject: [PATCH 03/12] Added changelog Signed-off-by: Priyansh Choudhary --- changelogs/unreleased/9628-priyansh17 | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelogs/unreleased/9628-priyansh17 diff --git a/changelogs/unreleased/9628-priyansh17 b/changelogs/unreleased/9628-priyansh17 new file mode 100644 index 000000000..f65b55e16 --- /dev/null +++ b/changelogs/unreleased/9628-priyansh17 @@ -0,0 +1 @@ +Implement original VolumeSnapshotContent deletion for legacy backups \ No newline at end of file From 68cee893f1f1ee1d4c13e8de9e5d9468915586f2 Mon Sep 17 00:00:00 2001 From: Priyansh Choudhary Date: Thu, 19 Mar 2026 03:16:32 +0530 Subject: [PATCH 04/12] Enhance logging and error handling in VolumeSnapshotContent deletion process Signed-off-by: Priyansh Choudhary --- .../csi/volumesnapshotcontent_action.go | 17 ++- .../csi/volumesnapshotcontent_action_test.go | 120 +++++++++++++++++- 2 files changed, 126 insertions(+), 11 deletions(-) diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action.go b/internal/delete/actions/csi/volumesnapshotcontent_action.go index 69d604e41..a8b43d456 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action.go @@ -71,7 +71,7 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( // So skip deleting VolumeSnapshotContent not have the backup name // in its labels. if !kubeutil.HasBackupLabel(&snapCont.ObjectMeta, input.Backup.Name) { - p.log.Info( + p.log.Infof( "VolumeSnapshotContent %s was not taken by backup %s, skipping deletion", snapCont.Name, input.Backup.Name, @@ -125,6 +125,7 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( if err := p.crClient.Create(context.TODO(), &snapCont); err != nil { return errors.Wrapf(err, "fail to create VolumeSnapshotContent %s", snapCont.Name) } + p.log.Infof("Created temp VolumeSnapshotContent %s with DeletionPolicy=Delete to trigger cloud snapshot cleanup", snapCont.Name) // Read resource timeout from backup annotation, if not set, use default value. timeout, err := time.ParseDuration( @@ -149,20 +150,23 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( }, ); err != nil { // Clean up the VSC we created since it can't become ready + p.log.WithError(err).Warnf("Temp VolumeSnapshotContent %s did not become ready, cleaning up", snapCont.Name) if deleteErr := p.crClient.Delete(context.TODO(), &snapCont); deleteErr != nil && !apierrors.IsNotFound(deleteErr) { - p.log.WithError(deleteErr).Errorf("Failed to clean up VolumeSnapshotContent %s", snapCont.Name) + p.log.WithError(deleteErr).Errorf("Failed to clean up temp VolumeSnapshotContent %s", snapCont.Name) } return errors.Wrapf(err, "fail to wait VolumeSnapshotContent %s becomes ready.", snapCont.Name) } + p.log.Infof("Temp VolumeSnapshotContent %s is ready, deleting to trigger cloud snapshot removal", snapCont.Name) if err := p.crClient.Delete( context.TODO(), &snapCont, ); err != nil && !apierrors.IsNotFound(err) { - p.log.Infof("VolumeSnapshotContent %s not found", snapCont.Name) + p.log.WithError(err).Errorf("Failed to delete temp VolumeSnapshotContent %s", snapCont.Name) return err } + p.log.Infof("Successfully triggered deletion of VolumeSnapshotContent %s and its cloud snapshot", snapCont.Name) return nil } @@ -180,7 +184,7 @@ func (p *volumeSnapshotContentDeleteItemAction) tryDeleteOriginalVSC( if apierrors.IsNotFound(err) { p.log.Debugf("Original VolumeSnapshotContent %s not found in cluster, will use temp VSC flow", vscName) } else { - p.log.Debugf("Error looking up original VolumeSnapshotContent %s, will use temp VSC flow", vscName) + p.log.WithError(err).Warnf("Error looking up original VolumeSnapshotContent %s, will use temp VSC flow", vscName) } return false } @@ -192,7 +196,7 @@ func (p *volumeSnapshotContentDeleteItemAction) tryDeleteOriginalVSC( original := existing.DeepCopy() existing.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentDelete if err := p.crClient.Patch(ctx, existing, crclient.MergeFrom(original)); err != nil { - p.log.WithError(err).Debugf("Failed to patch DeletionPolicy on original VSC %s, will use temp VSC flow", vscName) + p.log.WithError(err).Warnf("Failed to patch DeletionPolicy on original VSC %s, will use temp VSC flow", vscName) return false } p.log.Debugf("Patched DeletionPolicy to Delete on original VolumeSnapshotContent %s", vscName) @@ -200,10 +204,11 @@ func (p *volumeSnapshotContentDeleteItemAction) tryDeleteOriginalVSC( // Delete the original VSC — the CSI driver will clean up the cloud snapshot if err := p.crClient.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { - p.log.WithError(err).Debugf("Failed to delete original VolumeSnapshotContent %s, will use temp VSC flow", vscName) + p.log.WithError(err).Warnf("Failed to delete original VolumeSnapshotContent %s, will use temp VSC flow", vscName) return false } + p.log.Infof("Deleted original VolumeSnapshotContent %s with DeletionPolicy=Delete, CSI driver will remove cloud snapshot", vscName) return true } diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go index 76ebe4152..ee96018fe 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go @@ -39,14 +39,44 @@ import ( velerotest "github.com/vmware-tanzu/velero/pkg/test" ) +// fakeClientWithErrors wraps a real client and injects errors for specific operations. +type fakeClientWithErrors struct { + crclient.Client + getError error + patchError error + deleteError error +} + +func (c *fakeClientWithErrors) Get(ctx context.Context, key crclient.ObjectKey, obj crclient.Object, opts ...crclient.GetOption) error { + if c.getError != nil { + return c.getError + } + return c.Client.Get(ctx, key, obj, opts...) +} + +func (c *fakeClientWithErrors) Patch(ctx context.Context, obj crclient.Object, patch crclient.Patch, opts ...crclient.PatchOption) error { + if c.patchError != nil { + return c.patchError + } + return c.Client.Patch(ctx, obj, patch, opts...) +} + +func (c *fakeClientWithErrors) Delete(ctx context.Context, obj crclient.Object, opts ...crclient.DeleteOption) error { + if c.deleteError != nil { + return c.deleteError + } + return c.Client.Delete(ctx, obj, opts...) +} + func TestVSCExecute(t *testing.T) { snapshotHandleStr := "test" tests := []struct { - name string - item runtime.Unstructured - vsc *snapshotv1api.VolumeSnapshotContent - backup *velerov1api.Backup - function func( + name string + item runtime.Unstructured + vsc *snapshotv1api.VolumeSnapshotContent + backup *velerov1api.Backup + preExistingVSC *snapshotv1api.VolumeSnapshotContent + function func( ctx context.Context, vsc *snapshotv1api.VolumeSnapshotContent, client crclient.Client, @@ -96,6 +126,21 @@ func TestVSCExecute(t *testing.T) { return false, errors.Errorf("test error case") }, }, + { + name: "Original VSC exists in cluster, cleaned up directly", + vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), + backup: builder.ForBackup("velero", "backup").Result(), + expectErr: false, + preExistingVSC: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{Name: "bar"}, + Spec: snapshotv1api.VolumeSnapshotContentSpec{ + DeletionPolicy: snapshotv1api.VolumeSnapshotContentRetain, + Driver: "disk.csi.azure.com", + Source: snapshotv1api.VolumeSnapshotContentSource{SnapshotHandle: stringPtr("snap-123")}, + VolumeSnapshotRef: corev1api.ObjectReference{Name: "vs-1", Namespace: "default"}, + }, + }, + }, { name: "Error case with CSI error, dangling VSC should be cleaned up", vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), @@ -117,6 +162,10 @@ func TestVSCExecute(t *testing.T) { logger := logrus.StandardLogger() checkVSCReadiness = test.function + if test.preExistingVSC != nil { + require.NoError(t, crClient.Create(t.Context(), test.preExistingVSC)) + } + p := volumeSnapshotContentDeleteItemAction{log: logger, crClient: crClient} if test.vsc != nil { @@ -321,6 +370,67 @@ func TestTryDeleteOriginalVSC(t *testing.T) { } }) } + + // Error injection tests for tryDeleteOriginalVSC + t.Run("Get returns non-NotFound error, returns false", func(t *testing.T) { + errClient := &fakeClientWithErrors{ + Client: velerotest.NewFakeControllerRuntimeClient(t), + getError: fmt.Errorf("connection refused"), + } + p := &volumeSnapshotContentDeleteItemAction{ + log: logrus.StandardLogger(), + crClient: errClient, + } + require.False(t, p.tryDeleteOriginalVSC(t.Context(), "some-vsc")) + }) + + t.Run("Patch fails, returns false", func(t *testing.T) { + realClient := velerotest.NewFakeControllerRuntimeClient(t) + vsc := &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{Name: "patch-fail-vsc"}, + Spec: snapshotv1api.VolumeSnapshotContentSpec{ + DeletionPolicy: snapshotv1api.VolumeSnapshotContentRetain, + Driver: "disk.csi.azure.com", + Source: snapshotv1api.VolumeSnapshotContentSource{SnapshotHandle: stringPtr("snap-789")}, + VolumeSnapshotRef: corev1api.ObjectReference{Name: "vs-3", Namespace: "default"}, + }, + } + require.NoError(t, realClient.Create(t.Context(), vsc)) + + errClient := &fakeClientWithErrors{ + Client: realClient, + patchError: fmt.Errorf("patch forbidden"), + } + p := &volumeSnapshotContentDeleteItemAction{ + log: logrus.StandardLogger(), + crClient: errClient, + } + require.False(t, p.tryDeleteOriginalVSC(t.Context(), "patch-fail-vsc")) + }) + + t.Run("Delete fails, returns false", func(t *testing.T) { + realClient := velerotest.NewFakeControllerRuntimeClient(t) + vsc := &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{Name: "delete-fail-vsc"}, + Spec: snapshotv1api.VolumeSnapshotContentSpec{ + DeletionPolicy: snapshotv1api.VolumeSnapshotContentDelete, + Driver: "disk.csi.azure.com", + Source: snapshotv1api.VolumeSnapshotContentSource{SnapshotHandle: stringPtr("snap-999")}, + VolumeSnapshotRef: corev1api.ObjectReference{Name: "vs-4", Namespace: "default"}, + }, + } + require.NoError(t, realClient.Create(t.Context(), vsc)) + + errClient := &fakeClientWithErrors{ + Client: realClient, + deleteError: fmt.Errorf("delete forbidden"), + } + p := &volumeSnapshotContentDeleteItemAction{ + log: logrus.StandardLogger(), + crClient: errClient, + } + require.False(t, p.tryDeleteOriginalVSC(t.Context(), "delete-fail-vsc")) + }) } func boolPtr(b bool) *bool { From a5391e13e7e60834b45c39da8e3f2d62d9b2a4a6 Mon Sep 17 00:00:00 2001 From: Adam Zhang Date: Tue, 24 Mar 2026 16:05:20 +0800 Subject: [PATCH 05/12] [main] fix configmap lookup in non-default namespaces (#9638) * fix configmap lookup in non-default namespaces o.Namespace is empty when Validate runs (Complete hasn't been called yet), causing VerifyJSONConfigs to query the default namespace instead of the intended one. Replace o.Namespace with f.Namespace() in all three ConfigMap validation calls so the factory's already-resolved namespace is used. Signed-off-by: Adam Zhang * switch the call order of validate/complete switch the call order of validate/complete which accomplish the same effect. Signed-off-by: Adam Zhang --------- Signed-off-by: Adam Zhang --- changelogs/unreleased/9638-adam-jian-zhang | 1 + pkg/cmd/cli/install/install.go | 2 +- pkg/cmd/cli/install/install_test.go | 172 +++++++++++++++++++++ 3 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 changelogs/unreleased/9638-adam-jian-zhang diff --git a/changelogs/unreleased/9638-adam-jian-zhang b/changelogs/unreleased/9638-adam-jian-zhang new file mode 100644 index 000000000..3fd99e5e7 --- /dev/null +++ b/changelogs/unreleased/9638-adam-jian-zhang @@ -0,0 +1 @@ +Fix issue #9636, fix configmap lookup in non-default namespaces diff --git a/pkg/cmd/cli/install/install.go b/pkg/cmd/cli/install/install.go index 83b84ce2f..8115e1353 100644 --- a/pkg/cmd/cli/install/install.go +++ b/pkg/cmd/cli/install/install.go @@ -381,8 +381,8 @@ This is useful as a starting point for more customized installations. # velero install --provider azure --plugins velero/velero-plugin-for-microsoft-azure:v1.0.0 --bucket $BLOB_CONTAINER --secret-file ./credentials-velero --backup-location-config resourceGroup=$AZURE_BACKUP_RESOURCE_GROUP,storageAccount=$AZURE_STORAGE_ACCOUNT_ID[,subscriptionId=$AZURE_BACKUP_SUBSCRIPTION_ID] --snapshot-location-config apiTimeout=[,resourceGroup=$AZURE_BACKUP_RESOURCE_GROUP,subscriptionId=$AZURE_BACKUP_SUBSCRIPTION_ID]`, Run: func(c *cobra.Command, args []string) { - cmd.CheckError(o.Validate(c, args, f)) cmd.CheckError(o.Complete(args, f)) + cmd.CheckError(o.Validate(c, args, f)) cmd.CheckError(o.Run(c, f)) }, } diff --git a/pkg/cmd/cli/install/install_test.go b/pkg/cmd/cli/install/install_test.go index c5d147646..1e0e7a4cf 100644 --- a/pkg/cmd/cli/install/install_test.go +++ b/pkg/cmd/cli/install/install_test.go @@ -17,11 +17,18 @@ limitations under the License. package install import ( + "context" "testing" + "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + corev1api "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + factorymocks "github.com/vmware-tanzu/velero/pkg/client/mocks" + velerotest "github.com/vmware-tanzu/velero/pkg/test" ) func TestPriorityClassNameFlag(t *testing.T) { @@ -91,3 +98,168 @@ func TestPriorityClassNameFlag(t *testing.T) { }) } } + +// makeValidateCmd returns a minimal *cobra.Command that satisfies output.ValidateFlags. +func makeValidateCmd() *cobra.Command { + c := &cobra.Command{} + // output.ValidateFlags only inspects the "output" flag; add it so validation passes. + c.Flags().StringP("output", "o", "", "output format") + return c +} + +// configMapInNamespace builds a ConfigMap with a single JSON data entry in the given namespace. +func configMapInNamespace(namespace, name, jsonValue string) *corev1api.ConfigMap { + return &corev1api.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Data: map[string]string{ + "config": jsonValue, + }, + } +} + +// TestValidateConfigMapsUseFactoryNamespace verifies that Validate resolves the target +// namespace correctly for all three ConfigMap flags. +// +// The fix (Option B) calls Complete before Validate in NewCommand so that o.Namespace is +// populated from f.Namespace() before VerifyJSONConfigs runs. Tests mirror that order by +// calling Complete before Validate. +func TestValidateConfigMapsUseFactoryNamespace(t *testing.T) { + const targetNS = "tenant-b" + const defaultNS = "default" + + // Shared options that satisfy every other validation gate: + // - NoDefaultBackupLocation=true + UseVolumeSnapshots=false skips provider/bucket/plugins checks + // - NoSecret=true satisfies the secret-file check + baseOptions := func() *Options { + o := NewInstallOptions() + o.NoDefaultBackupLocation = true + o.UseVolumeSnapshots = false + o.NoSecret = true + return o + } + + tests := []struct { + name string + setupOpts func(o *Options, cmName string) + cmJSON string + wantErrMsg string // substring expected in error; empty means success + }{ + { + name: "NodeAgentConfigMap found in factory namespace", + setupOpts: func(o *Options, cmName string) { + o.NodeAgentConfigMap = cmName + }, + cmJSON: `{}`, + }, + { + name: "NodeAgentConfigMap not found when only in default namespace", + setupOpts: func(o *Options, cmName string) { + o.NodeAgentConfigMap = cmName + }, + cmJSON: `{}`, + wantErrMsg: "--node-agent-configmap specified ConfigMap", + }, + { + name: "RepoMaintenanceJobConfigMap found in factory namespace", + setupOpts: func(o *Options, cmName string) { + o.RepoMaintenanceJobConfigMap = cmName + }, + cmJSON: `{}`, + }, + { + name: "RepoMaintenanceJobConfigMap not found when only in default namespace", + setupOpts: func(o *Options, cmName string) { + o.RepoMaintenanceJobConfigMap = cmName + }, + cmJSON: `{}`, + wantErrMsg: "--repo-maintenance-job-configmap specified ConfigMap", + }, + { + name: "BackupRepoConfigMap found in factory namespace", + setupOpts: func(o *Options, cmName string) { + o.BackupRepoConfigMap = cmName + }, + cmJSON: `{}`, + }, + { + name: "BackupRepoConfigMap not found when only in default namespace", + setupOpts: func(o *Options, cmName string) { + o.BackupRepoConfigMap = cmName + }, + cmJSON: `{}`, + wantErrMsg: "--backup-repository-configmap specified ConfigMap", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + const cmName = "my-config" + + // Decide where to place the ConfigMap: + // "not found" cases put it in "default", so the factory namespace lookup misses it. + cmNamespace := targetNS + if tc.wantErrMsg != "" { + cmNamespace = defaultNS + } + + cm := configMapInNamespace(cmNamespace, cmName, tc.cmJSON) + kbClient := velerotest.NewFakeControllerRuntimeClient(t, cm) + + f := &factorymocks.Factory{} + f.On("Namespace").Return(targetNS) + f.On("KubebuilderClient").Return(kbClient, nil) + + o := baseOptions() + tc.setupOpts(o, cmName) + + // Mirror the NewCommand call order: Complete populates o.Namespace before Validate runs. + require.NoError(t, o.Complete([]string{}, f)) + + c := makeValidateCmd() + c.SetContext(context.Background()) + + err := o.Validate(c, []string{}, f) + + if tc.wantErrMsg == "" { + require.NoError(t, err) + } else { + require.Error(t, err) + assert.Contains(t, err.Error(), tc.wantErrMsg) + } + }) + } +} + +// TestNewCommandRunClosureOrder covers the Run closure in NewCommand (the lines that were +// reordered by the fix: Complete → Validate → Run). +// +// The closure uses CheckError which calls os.Exit on any error, so the only safe path is one +// where all three steps return nil. DryRun=true causes o.Run to return after PrintWithFormat +// (which is a no-op when no --output flag is set) without touching any cluster clients. +func TestNewCommandRunClosureOrder(t *testing.T) { + const targetNS = "tenant-b" + const cmName = "my-config" + + cm := configMapInNamespace(targetNS, cmName, `{}`) + kbClient := velerotest.NewFakeControllerRuntimeClient(t, cm) + + f := &factorymocks.Factory{} + f.On("Namespace").Return(targetNS) + f.On("KubebuilderClient").Return(kbClient, nil) + + c := NewCommand(f) + c.SetArgs([]string{ + "--no-default-backup-location", + "--use-volume-snapshots=false", + "--no-secret", + "--dry-run", + "--node-agent-configmap", cmName, + }) + + // Execute drives the full Run closure: Complete populates o.Namespace, Validate + // looks up the ConfigMap in targetNS (succeeds), Run returns early via DryRun. + require.NoError(t, c.Execute()) +} From e9d312c27e130262543e1fb9f2547feecdcb2cf0 Mon Sep 17 00:00:00 2001 From: Priyansh Choudhary Date: Tue, 24 Mar 2026 20:34:26 +0530 Subject: [PATCH 06/12] refactor: simplify VolumeSnapshotContent deletion logic and remove unused timeout handling Signed-off-by: Priyansh Choudhary --- .../csi/volumesnapshotcontent_action.go | 47 +++++++------------ .../csi/volumesnapshotcontent_action_test.go | 6 +-- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action.go b/internal/delete/actions/csi/volumesnapshotcontent_action.go index a8b43d456..51ab49455 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action.go @@ -18,7 +18,6 @@ package csi import ( "context" - "time" "github.com/google/uuid" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" @@ -27,14 +26,11 @@ import ( corev1api "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/wait" crclient "sigs.k8s.io/controller-runtime/pkg/client" - velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/client" plugincommon "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/plugin/velero" - "github.com/vmware-tanzu/velero/pkg/util/boolptr" kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube" ) @@ -127,34 +123,22 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( } p.log.Infof("Created temp VolumeSnapshotContent %s with DeletionPolicy=Delete to trigger cloud snapshot cleanup", snapCont.Name) - // Read resource timeout from backup annotation, if not set, use default value. - timeout, err := time.ParseDuration( - input.Backup.Annotations[velerov1api.ResourceTimeoutAnnotation]) - if err != nil { - p.log.Warnf("fail to parse resource timeout annotation %s: %s", - input.Backup.Annotations[velerov1api.ResourceTimeoutAnnotation], err.Error()) - timeout = 10 * time.Minute - } - p.log.Debugf("resource timeout is set to %s", timeout.String()) - - interval := 5 * time.Second - - // Wait until VSC created and ReadyToUse is true. - if err := wait.PollUntilContextTimeout( - context.Background(), - interval, - timeout, - true, - func(ctx context.Context) (bool, error) { - return checkVSCReadiness(ctx, &snapCont, p.crClient) - }, - ); err != nil { - // Clean up the VSC we created since it can't become ready - p.log.WithError(err).Warnf("Temp VolumeSnapshotContent %s did not become ready, cleaning up", snapCont.Name) + // Check if the VSC is ready before proceeding to deletion. + ready, err := checkVSCReadiness(context.TODO(), &snapCont, p.crClient) + if err != nil || !ready { + // Clean up the VSC we created since it isn't ready + if err != nil { + p.log.WithError(err).Warnf("Temp VolumeSnapshotContent %s is not ready, cleaning up", snapCont.Name) + } else { + p.log.Warnf("Temp VolumeSnapshotContent %s is not ready, cleaning up", snapCont.Name) + } if deleteErr := p.crClient.Delete(context.TODO(), &snapCont); deleteErr != nil && !apierrors.IsNotFound(deleteErr) { p.log.WithError(deleteErr).Errorf("Failed to clean up temp VolumeSnapshotContent %s", snapCont.Name) } - return errors.Wrapf(err, "fail to wait VolumeSnapshotContent %s becomes ready.", snapCont.Name) + if err != nil { + return errors.Wrapf(err, "VolumeSnapshotContent %s is not ready", snapCont.Name) + } + return errors.Errorf("VolumeSnapshotContent %s is not ready", snapCont.Name) } p.log.Infof("Temp VolumeSnapshotContent %s is ready, deleting to trigger cloud snapshot removal", snapCont.Name) @@ -212,6 +196,9 @@ func (p *volumeSnapshotContentDeleteItemAction) tryDeleteOriginalVSC( return true } +// checkVSCReadiness checks if the given VolumeSnapshotContent has a SnapshotHandle in its status, +// which indicates that the CSI driver has processed the VSC and it's ready for deletion. +// It also checks for any permanent errors reported by the CSI driver and fails fast if such an error is found. var checkVSCReadiness = func( ctx context.Context, vsc *snapshotv1api.VolumeSnapshotContent, @@ -224,7 +211,7 @@ var checkVSCReadiness = func( ) } - if tmpVSC.Status != nil && boolptr.IsSetToTrue(tmpVSC.Status.ReadyToUse) { + if tmpVSC.Status != nil && tmpVSC.Status.SnapshotHandle != nil { return true, nil } diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go index ee96018fe..3f0df2120 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go @@ -103,7 +103,7 @@ func TestVSCExecute(t *testing.T) { { name: "Normal case, VolumeSnapshot should be deleted", vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).VolumeSnapshotClassName("volumesnapshotclass").Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), - backup: builder.ForBackup("velero", "backup").ObjectMeta(builder.WithAnnotationsMap(map[string]string{velerov1api.ResourceTimeoutAnnotation: "5s"})).Result(), + backup: builder.ForBackup("velero", "backup").Result(), expectErr: false, function: func( ctx context.Context, @@ -116,7 +116,7 @@ func TestVSCExecute(t *testing.T) { { name: "Error case, deletion fails", vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), - backup: builder.ForBackup("velero", "backup").ObjectMeta(builder.WithAnnotationsMap(map[string]string{velerov1api.ResourceTimeoutAnnotation: "5s"})).Result(), + backup: builder.ForBackup("velero", "backup").Result(), expectErr: true, function: func( ctx context.Context, @@ -144,7 +144,7 @@ func TestVSCExecute(t *testing.T) { { name: "Error case with CSI error, dangling VSC should be cleaned up", vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), - backup: builder.ForBackup("velero", "backup").ObjectMeta(builder.WithAnnotationsMap(map[string]string{velerov1api.ResourceTimeoutAnnotation: "5s"})).Result(), + backup: builder.ForBackup("velero", "backup").Result(), expectErr: true, function: func( ctx context.Context, From 905a561c84b4e390ca03db5ce745a09c6c3ae36f Mon Sep 17 00:00:00 2001 From: Priyansh Choudhary Date: Tue, 24 Mar 2026 20:49:08 +0530 Subject: [PATCH 07/12] added changelog Signed-off-by: Priyansh Choudhary --- changelogs/unreleased/9643-priyansh17 | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelogs/unreleased/9643-priyansh17 diff --git a/changelogs/unreleased/9643-priyansh17 b/changelogs/unreleased/9643-priyansh17 new file mode 100644 index 000000000..10870c871 --- /dev/null +++ b/changelogs/unreleased/9643-priyansh17 @@ -0,0 +1 @@ +Fix issue #9641, Remove redundant ReadyToUse polling in CSI VolumeSnapshotContent delete plugin \ No newline at end of file From c74d5e7aba5c765a37aa96a6379f369e1c847638 Mon Sep 17 00:00:00 2001 From: Priyansh Choudhary Date: Wed, 25 Mar 2026 15:23:11 +0530 Subject: [PATCH 08/12] refactor: streamline VolumeSnapshotContent deletion process and remove readiness checks Signed-off-by: Priyansh Choudhary --- .../csi/volumesnapshotcontent_action.go | 50 +------- .../csi/volumesnapshotcontent_action_test.go | 110 +----------------- 2 files changed, 3 insertions(+), 157 deletions(-) diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action.go b/internal/delete/actions/csi/volumesnapshotcontent_action.go index 51ab49455..98e0fc03b 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action.go @@ -123,25 +123,8 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( } p.log.Infof("Created temp VolumeSnapshotContent %s with DeletionPolicy=Delete to trigger cloud snapshot cleanup", snapCont.Name) - // Check if the VSC is ready before proceeding to deletion. - ready, err := checkVSCReadiness(context.TODO(), &snapCont, p.crClient) - if err != nil || !ready { - // Clean up the VSC we created since it isn't ready - if err != nil { - p.log.WithError(err).Warnf("Temp VolumeSnapshotContent %s is not ready, cleaning up", snapCont.Name) - } else { - p.log.Warnf("Temp VolumeSnapshotContent %s is not ready, cleaning up", snapCont.Name) - } - if deleteErr := p.crClient.Delete(context.TODO(), &snapCont); deleteErr != nil && !apierrors.IsNotFound(deleteErr) { - p.log.WithError(deleteErr).Errorf("Failed to clean up temp VolumeSnapshotContent %s", snapCont.Name) - } - if err != nil { - return errors.Wrapf(err, "VolumeSnapshotContent %s is not ready", snapCont.Name) - } - return errors.Errorf("VolumeSnapshotContent %s is not ready", snapCont.Name) - } - - p.log.Infof("Temp VolumeSnapshotContent %s is ready, deleting to trigger cloud snapshot removal", snapCont.Name) + // Delete the temp VSC immediately to trigger cloud snapshot removal. + // The CSI driver will handle the actual cloud snapshot deletion. if err := p.crClient.Delete( context.TODO(), &snapCont, @@ -196,35 +179,6 @@ func (p *volumeSnapshotContentDeleteItemAction) tryDeleteOriginalVSC( return true } -// checkVSCReadiness checks if the given VolumeSnapshotContent has a SnapshotHandle in its status, -// which indicates that the CSI driver has processed the VSC and it's ready for deletion. -// It also checks for any permanent errors reported by the CSI driver and fails fast if such an error is found. -var checkVSCReadiness = func( - ctx context.Context, - vsc *snapshotv1api.VolumeSnapshotContent, - client crclient.Client, -) (bool, error) { - tmpVSC := new(snapshotv1api.VolumeSnapshotContent) - if err := client.Get(ctx, crclient.ObjectKeyFromObject(vsc), tmpVSC); err != nil { - return false, errors.Wrapf( - err, "failed to get VolumeSnapshotContent %s", vsc.Name, - ) - } - - if tmpVSC.Status != nil && tmpVSC.Status.SnapshotHandle != nil { - return true, nil - } - - // Fail fast on permanent CSI driver errors (e.g., InvalidSnapshot.NotFound) - if tmpVSC.Status != nil && tmpVSC.Status.Error != nil && tmpVSC.Status.Error.Message != nil { - return false, errors.Errorf( - "VolumeSnapshotContent %s has error: %s", vsc.Name, *tmpVSC.Status.Error.Message, - ) - } - - return false, nil -} - func NewVolumeSnapshotContentDeleteItemAction( f client.Factory, ) plugincommon.HandlerInitializer { diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go index 3f0df2120..114bd752f 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go @@ -22,7 +22,6 @@ import ( "testing" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" - "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" corev1api "k8s.io/api/core/v1" @@ -76,12 +75,7 @@ func TestVSCExecute(t *testing.T) { vsc *snapshotv1api.VolumeSnapshotContent backup *velerov1api.Backup preExistingVSC *snapshotv1api.VolumeSnapshotContent - function func( - ctx context.Context, - vsc *snapshotv1api.VolumeSnapshotContent, - client crclient.Client, - ) (bool, error) - expectErr bool + expectErr bool }{ { name: "VolumeSnapshotContent doesn't have backup label", @@ -105,26 +99,6 @@ func TestVSCExecute(t *testing.T) { vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).VolumeSnapshotClassName("volumesnapshotclass").Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), backup: builder.ForBackup("velero", "backup").Result(), expectErr: false, - function: func( - ctx context.Context, - vsc *snapshotv1api.VolumeSnapshotContent, - client crclient.Client, - ) (bool, error) { - return true, nil - }, - }, - { - name: "Error case, deletion fails", - vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), - backup: builder.ForBackup("velero", "backup").Result(), - expectErr: true, - function: func( - ctx context.Context, - vsc *snapshotv1api.VolumeSnapshotContent, - client crclient.Client, - ) (bool, error) { - return false, errors.Errorf("test error case") - }, }, { name: "Original VSC exists in cluster, cleaned up directly", @@ -141,26 +115,12 @@ func TestVSCExecute(t *testing.T) { }, }, }, - { - name: "Error case with CSI error, dangling VSC should be cleaned up", - vsc: builder.ForVolumeSnapshotContent("bar").ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).Result(), - backup: builder.ForBackup("velero", "backup").Result(), - expectErr: true, - function: func( - ctx context.Context, - vsc *snapshotv1api.VolumeSnapshotContent, - client crclient.Client, - ) (bool, error) { - return false, errors.Errorf("VolumeSnapshotContent %s has error: InvalidSnapshot.NotFound", vsc.Name) - }, - }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { crClient := velerotest.NewFakeControllerRuntimeClient(t) logger := logrus.StandardLogger() - checkVSCReadiness = test.function if test.preExistingVSC != nil { require.NoError(t, crClient.Create(t.Context(), test.preExistingVSC)) @@ -222,74 +182,6 @@ func TestNewVolumeSnapshotContentDeleteItemAction(t *testing.T) { require.NoError(t, err1) } -func TestCheckVSCReadiness(t *testing.T) { - tests := []struct { - name string - vsc *snapshotv1api.VolumeSnapshotContent - createVSC bool - expectErr bool - ready bool - }{ - { - name: "VSC not exist", - vsc: &snapshotv1api.VolumeSnapshotContent{ - ObjectMeta: metav1.ObjectMeta{ - Name: "vsc-1", - Namespace: "velero", - }, - }, - createVSC: false, - expectErr: true, - ready: false, - }, - { - name: "VSC not ready", - vsc: &snapshotv1api.VolumeSnapshotContent{ - ObjectMeta: metav1.ObjectMeta{ - Name: "vsc-1", - Namespace: "velero", - }, - }, - createVSC: true, - expectErr: false, - ready: false, - }, - { - name: "VSC has error from CSI driver", - vsc: &snapshotv1api.VolumeSnapshotContent{ - ObjectMeta: metav1.ObjectMeta{ - Name: "vsc-1", - Namespace: "velero", - }, - Status: &snapshotv1api.VolumeSnapshotContentStatus{ - ReadyToUse: boolPtr(false), - Error: &snapshotv1api.VolumeSnapshotError{ - Message: stringPtr("InvalidSnapshot.NotFound: The snapshot 'snap-0abc123' does not exist."), - }, - }, - }, - createVSC: true, - expectErr: true, - ready: false, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - crClient := velerotest.NewFakeControllerRuntimeClient(t) - if test.createVSC { - require.NoError(t, crClient.Create(t.Context(), test.vsc)) - } - - ready, err := checkVSCReadiness(t.Context(), test.vsc, crClient) - require.Equal(t, test.ready, ready) - if test.expectErr { - require.Error(t, err) - } - }) - } -} - func TestTryDeleteOriginalVSC(t *testing.T) { tests := []struct { name string From 91922103b44f0a71f5a20bd4420dc8fa2fe65023 Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Fri, 27 Mar 2026 16:09:53 +0800 Subject: [PATCH 09/12] Update the trivy-action version from main to specific tag to fix supply chain attack https://www.aquasec.com/blog/trivy-supply-chain-attack-what-you-need-to-know/ Signed-off-by: Xun Jiang --- .github/workflows/nightly-trivy-scan.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly-trivy-scan.yml b/.github/workflows/nightly-trivy-scan.yml index 0f5fe0a68..85ce3cdc5 100644 --- a/.github/workflows/nightly-trivy-scan.yml +++ b/.github/workflows/nightly-trivy-scan.yml @@ -22,7 +22,7 @@ jobs: uses: actions/checkout@v6 - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 with: image-ref: 'docker.io/velero/${{ matrix.images }}:${{ matrix.versions }}' severity: 'CRITICAL,HIGH,MEDIUM' From f0aa64172eb44b26260215a24b07b6fbea15f635 Mon Sep 17 00:00:00 2001 From: Xun Jiang/Bruce Jiang <59276555+blackpiglet@users.noreply.github.com> Date: Wed, 1 Apr 2026 01:55:07 +0800 Subject: [PATCH 10/12] Fix Repository Maintenance Job Configuration's global part E2E case. (#9633) Signed-off-by: Xun Jiang --- test/e2e/repomaintenance/repo_maintenance_config.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/e2e/repomaintenance/repo_maintenance_config.go b/test/e2e/repomaintenance/repo_maintenance_config.go index 1c616ea60..c0092c4bf 100644 --- a/test/e2e/repomaintenance/repo_maintenance_config.go +++ b/test/e2e/repomaintenance/repo_maintenance_config.go @@ -55,10 +55,12 @@ var GlobalRepoMaintenanceTest func() = TestFunc(&RepoMaintenanceTestCase{ jobConfigs: velerotypes.JobConfigs{ KeepLatestMaintenanceJobs: &keepJobNum, PodResources: &velerokubeutil.PodResources{ - CPURequest: "100m", - MemoryRequest: "100Mi", - CPULimit: "200m", - MemoryLimit: "200Mi", + CPURequest: "100m", + MemoryRequest: "100Mi", + EphemeralStorageRequest: "5Gi", + CPULimit: "200m", + MemoryLimit: "200Mi", + EphemeralStorageLimit: "10Gi", }, PriorityClassName: test.PriorityClassNameForRepoMaintenance, }, From 5433eb308166d0850efd583dc8fbbcadfc874d9f Mon Sep 17 00:00:00 2001 From: Gabriele Fedi Date: Wed, 1 Apr 2026 20:27:18 +0200 Subject: [PATCH 11/12] feat: support backup hooks on native sidecars (#9403) * feat: support backup hooks on sidecars Add support for configuring Kubernates native Sidecars as target containrs for Backup Hooks commands. This is purely a validation level patch as the actual pods/exec API doesn't make any distinction between standard and sidecar containers. Signed-off-by: Gabriele Fedi * test: extend unit tests Signed-off-by: Gabriele Fedi * chore: changelog Signed-off-by: Gabriele Fedi * style: fix linter issues Signed-off-by: Gabriele Fedi --------- Signed-off-by: Gabriele Fedi --- changelogs/unreleased/9403-GabriFedi97 | 1 + pkg/podexec/pod_command_executor.go | 21 +++++++++++++++++---- pkg/podexec/pod_command_executor_test.go | 18 +++++++++++++++++- 3 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 changelogs/unreleased/9403-GabriFedi97 diff --git a/changelogs/unreleased/9403-GabriFedi97 b/changelogs/unreleased/9403-GabriFedi97 new file mode 100644 index 000000000..515549220 --- /dev/null +++ b/changelogs/unreleased/9403-GabriFedi97 @@ -0,0 +1 @@ +Include InitContainer configured as Sidecars when validating the existence of the target containers configured for the Backup Hooks diff --git a/pkg/podexec/pod_command_executor.go b/pkg/podexec/pod_command_executor.go index b1d7fbf59..0dbc28e95 100644 --- a/pkg/podexec/pod_command_executor.go +++ b/pkg/podexec/pod_command_executor.go @@ -20,6 +20,7 @@ import ( "bytes" "context" "net/url" + "slices" "time" "github.com/pkg/errors" @@ -184,10 +185,22 @@ func (e *defaultPodCommandExecutor) ExecutePodCommand(log logrus.FieldLogger, it } func ensureContainerExists(pod *corev1api.Pod, container string) error { - for _, c := range pod.Spec.Containers { - if c.Name == container { - return nil - } + existsAsMainContainer := slices.ContainsFunc(pod.Spec.Containers, func(c corev1api.Container) bool { + return c.Name == container + }) + + if existsAsMainContainer { + return nil + } + + existsAsSidecar := slices.ContainsFunc(pod.Spec.InitContainers, func(c corev1api.Container) bool { + return c.RestartPolicy != nil && + *c.RestartPolicy == corev1api.ContainerRestartPolicyAlways && + c.Name == container + }) + + if existsAsSidecar { + return nil } return errors.Errorf("no such container: %q", container) diff --git a/pkg/podexec/pod_command_executor_test.go b/pkg/podexec/pod_command_executor_test.go index e28eb0458..3286ba42d 100644 --- a/pkg/podexec/pod_command_executor_test.go +++ b/pkg/podexec/pod_command_executor_test.go @@ -35,6 +35,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/rest" "k8s.io/client-go/tools/remotecommand" + "k8s.io/utils/ptr" v1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" velerotest "github.com/vmware-tanzu/velero/pkg/test" @@ -253,6 +254,15 @@ func TestEnsureContainerExists(t *testing.T) { Name: "foo", }, }, + InitContainers: []corev1api.Container{ + { + Name: "baz", + }, + { + Name: "qux", + RestartPolicy: ptr.To(corev1api.ContainerRestartPolicyAlways), + }, + }, }, } @@ -260,7 +270,13 @@ func TestEnsureContainerExists(t *testing.T) { require.EqualError(t, err, `no such container: "bar"`) err = ensureContainerExists(pod, "foo") - assert.NoError(t, err) + require.NoError(t, err) + + err = ensureContainerExists(pod, "baz") + require.EqualError(t, err, `no such container: "baz"`) + + err = ensureContainerExists(pod, "qux") + require.NoError(t, err) } func TestPodCompeted(t *testing.T) { From 94259e8a5c2770e18752c32518343c87e27f6bbb Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Thu, 2 Apr 2026 11:09:19 +0800 Subject: [PATCH 12/12] Pin the sigs.k8s.io/controller-runtime to v0.23.2 The tag used to latest. Due to latest tag v0.23.3 already used Golang v1.26, Velero main still uses v1.25. Build failed. To fix this, pin the controller-runtime to v0.23.2 Signed-off-by: Xun Jiang --- hack/build-image/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/build-image/Dockerfile b/hack/build-image/Dockerfile index 65378b22e..0a60e6a16 100644 --- a/hack/build-image/Dockerfile +++ b/hack/build-image/Dockerfile @@ -22,7 +22,7 @@ ENV GOPROXY=${GOPROXY} # kubebuilder test bundle is separated from kubebuilder. Need to setup it for CI test. # Using setup-envtest to download envtest binaries -RUN go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest && \ +RUN go install sigs.k8s.io/controller-runtime/tools/setup-envtest@v0.0.0-20260305094418-8122a6266696 && \ mkdir -p /usr/local/kubebuilder/bin && \ ENVTEST_ASSETS_DIR=$(setup-envtest use 1.33.0 --bin-dir /usr/local/kubebuilder/bin -p path) && \ cp -r ${ENVTEST_ASSETS_DIR}/* /usr/local/kubebuilder/bin/