From ff8a070dd37b37aede18ee185f5a11d99054a01e Mon Sep 17 00:00:00 2001 From: Priyansh Choudhary Date: Mon, 21 Jul 2025 20:22:55 +0530 Subject: [PATCH] Refactor Snapshot Checkpoint in E2E Tests (#9083) * Refactor backup volume info retrieval and snapshot checkpoint building in e2e tests Signed-off-by: Priyansh Choudhary log backup volume info retrieval and snapshot checkpoint building Signed-off-by: Priyansh Choudhary Add error handling for volume info retrieval in backup tests Signed-off-by: Priyansh Choudhary Add error handling for volume info retrieval in backup tests Signed-off-by: Priyansh Choudhary * Update snapshot checkpoint building to use DefaultKibishiiWorkerCounts Signed-off-by: Priyansh Choudhary --------- Signed-off-by: Priyansh Choudhary --- test/e2e/backups/deletion.go | 16 ++++++++++-- test/e2e/backups/ttl.go | 18 ++++++++++--- test/e2e/bsl-mgmt/deletion.go | 38 ++++++++++++++++++++++------ test/e2e/migration/migration.go | 21 ++++++++++++--- test/e2e/upgrade/upgrade.go | 13 ++++++++-- test/util/kibishii/kibishii_utils.go | 20 +++++++++++++-- test/util/velero/velero_utils.go | 28 ++++++++++++++++---- 7 files changed, 128 insertions(+), 26 deletions(-) diff --git a/test/e2e/backups/deletion.go b/test/e2e/backups/deletion.go index b24325b5c..54388b70a 100644 --- a/test/e2e/backups/deletion.go +++ b/test/e2e/backups/deletion.go @@ -166,12 +166,24 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupLoc return err } + backupVolumeInfo, err := GetVolumeInfo( + veleroCfg.ObjectStoreProvider, + veleroCfg.CloudCredentialsFile, + veleroCfg.BSLBucket, + veleroCfg.BSLPrefix, + veleroCfg.BSLConfig, + backupName, + BackupObjectsPrefix+"/"+backupName, + ) + if err != nil { + return errors.Wrapf(err, "Failed to get volume info for backup %s", backupName) + } if useVolumeSnapshots { // Check for snapshots existence if veleroCfg.CloudProvider == Vsphere { // For vSphere, checking snapshot should base on namespace and backup name for _, ns := range workloadNamespaceList { - snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts, ns, backupName, KibishiiPVCNameList) + snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, DefaultKibishiiWorkerCounts, ns, backupName, KibishiiPVCNameList) Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint") err = CheckSnapshotsInProvider( veleroCfg, @@ -186,7 +198,7 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupLoc } else { // For public cloud, When using backup name to index VolumeSnapshotContents, make sure count of VolumeSnapshotContents should including PVs in all namespace // so VolumeSnapshotContents count should be equal to "namespace count" * "Kibishii worker count per namespace". - snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts*nsCount, "", backupName, KibishiiPVCNameList) + snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, DefaultKibishiiWorkerCounts*nsCount, "", backupName, KibishiiPVCNameList) Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint") // Get all snapshots base on backup name, regardless of namespaces diff --git a/test/e2e/backups/ttl.go b/test/e2e/backups/ttl.go index e65506fa3..f781c32d4 100644 --- a/test/e2e/backups/ttl.go +++ b/test/e2e/backups/ttl.go @@ -54,7 +54,6 @@ func (b *TTL) Init() { } func TTLTest() { - var err error var veleroCfg VeleroConfig useVolumeSnapshots := true test := new(TTL) @@ -138,10 +137,21 @@ func TTLTest() { }) } - snapshotCheckPoint, err = GetSnapshotCheckPoint( - client, + backupVolumeInfo, err := GetVolumeInfo( + veleroCfg.ObjectStoreProvider, + veleroCfg.CloudCredentialsFile, + veleroCfg.BSLBucket, + veleroCfg.BSLPrefix, + veleroCfg.BSLConfig, + test.backupName, + BackupObjectsPrefix+"/"+test.backupName, + ) + Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup") + + snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo( veleroCfg, - 2, + backupVolumeInfo, + DefaultKibishiiWorkerCounts, test.testNS, test.backupName, KibishiiPVCNameList, diff --git a/test/e2e/bsl-mgmt/deletion.go b/test/e2e/bsl-mgmt/deletion.go index 9b79c6a8e..838aa67b5 100644 --- a/test/e2e/bsl-mgmt/deletion.go +++ b/test/e2e/bsl-mgmt/deletion.go @@ -52,7 +52,6 @@ func BslDeletionWithRestic() { } func BslDeletionTest(useVolumeSnapshots bool) { var ( - err error veleroCfg VeleroConfig ) veleroCfg = VeleroCfg @@ -230,13 +229,25 @@ func BslDeletionTest(useVolumeSnapshots bool) { backupName2, 1)).To(Succeed()) }) } + + backupVolumeInfo, err := GetVolumeInfo( + veleroCfg.ObjectStoreProvider, + veleroCfg.CloudCredentialsFile, + veleroCfg.BSLBucket, + veleroCfg.BSLPrefix, + veleroCfg.BSLConfig, + backupName1, + BackupObjectsPrefix+"/"+backupName1, + ) + Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup") + if veleroCfg.CloudProvider != VanillaZFS { var snapshotCheckPoint SnapshotCheckPoint snapshotCheckPoint.NamespaceBackedUp = bslDeletionTestNs By(fmt.Sprintf("Snapshot of bsl %s should be created in cloud object store", backupLocation1), func() { - snapshotCheckPoint, err = GetSnapshotCheckPoint( - *veleroCfg.ClientToInstallVelero, + snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo( veleroCfg, + backupVolumeInfo, 1, bslDeletionTestNs, backupName1, @@ -251,9 +262,9 @@ func BslDeletionTest(useVolumeSnapshots bool) { )).To(Succeed()) }) By(fmt.Sprintf("Snapshot of bsl %s should be created in cloud object store", backupLocation2), func() { - snapshotCheckPoint, err = GetSnapshotCheckPoint( - *veleroCfg.ClientToInstallVelero, + snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo( veleroCfg, + backupVolumeInfo, 1, bslDeletionTestNs, backupName2, @@ -343,10 +354,21 @@ func BslDeletionTest(useVolumeSnapshots bool) { }) } + backupVolumeInfo, err := GetVolumeInfo( + veleroCfg.ObjectStoreProvider, + veleroCfg.CloudCredentialsFile, + veleroCfg.BSLBucket, + veleroCfg.BSLPrefix, + veleroCfg.BSLConfig, + backupName1, + BackupObjectsPrefix+"/"+backupName1, + ) + Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup") + var snapshotCheckPoint SnapshotCheckPoint snapshotCheckPoint.NamespaceBackedUp = bslDeletionTestNs By(fmt.Sprintf("Snapshot should not be deleted in cloud object store after deleting bsl %s", backupLocation1), func() { - snapshotCheckPoint, err = GetSnapshotCheckPoint(*veleroCfg.ClientToInstallVelero, veleroCfg, 1, bslDeletionTestNs, backupName1, []string{podName1}) + snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, 1, bslDeletionTestNs, backupName1, []string{podName1}) Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint") Expect(CheckSnapshotsInProvider( veleroCfg, @@ -356,9 +378,9 @@ func BslDeletionTest(useVolumeSnapshots bool) { )).To(Succeed()) }) By(fmt.Sprintf("Snapshot should not be deleted in cloud object store after deleting bsl %s", backupLocation2), func() { - snapshotCheckPoint, err = GetSnapshotCheckPoint( - *veleroCfg.ClientToInstallVelero, + snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo( veleroCfg, + backupVolumeInfo, 1, bslDeletionTestNs, backupName2, diff --git a/test/e2e/migration/migration.go b/test/e2e/migration/migration.go index 32119846d..1c896aa9f 100644 --- a/test/e2e/migration/migration.go +++ b/test/e2e/migration/migration.go @@ -35,6 +35,8 @@ import ( veleroutil "github.com/vmware-tanzu/velero/test/util/velero" ) +const BackupObjectsPrefix = "backups" + type migrationE2E struct { framework.TestCase useVolumeSnapshots bool @@ -263,6 +265,8 @@ func (m *migrationE2E) Backup() error { snapshotCheckPoint.NamespaceBackedUp = m.CaseBaseName if OriginVeleroCfg.SnapshotMoveData { + // todo: Remove this as VSC are not preserved post backup. It's 0 by default. + //VolumeSnapshotContent should be deleted after data movement _, err := util.CheckVolumeSnapshotCR( *m.VeleroCfg.DefaultClient, @@ -284,16 +288,27 @@ func (m *migrationE2E) Backup() error { } By("Snapshot should be created in cloud object store with retain policy", func() { - snapshotCheckPoint, err = veleroutil.GetSnapshotCheckPoint( - *OriginVeleroCfg.DefaultClient, + backupVolumeInfo, err := providers.GetVolumeInfo( + OriginVeleroCfg.ObjectStoreProvider, + OriginVeleroCfg.CloudCredentialsFile, + OriginVeleroCfg.BSLBucket, + OriginVeleroCfg.BSLPrefix, + OriginVeleroCfg.BSLConfig, + m.BackupName, + BackupObjectsPrefix+"/"+m.BackupName, + ) + Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup") + + snapshotCheckPoint, err := veleroutil.BuildSnapshotCheckPointFromVolumeInfo( OriginVeleroCfg, + backupVolumeInfo, m.kibishiiData.ExpectedNodes, m.CaseBaseName, m.BackupName, kibishii.GetKibishiiPVCNameList(m.kibishiiData.ExpectedNodes), ) - Expect(err).NotTo(HaveOccurred(), "Fail to get snapshot checkpoint") + Expect(providers.CheckSnapshotsInProvider( OriginVeleroCfg, m.BackupName, diff --git a/test/e2e/upgrade/upgrade.go b/test/e2e/upgrade/upgrade.go index 4b4a9c824..6832c160c 100644 --- a/test/e2e/upgrade/upgrade.go +++ b/test/e2e/upgrade/upgrade.go @@ -199,8 +199,17 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC var snapshotCheckPoint SnapshotCheckPoint snapshotCheckPoint.NamespaceBackedUp = upgradeNamespace By("Snapshot should be created in cloud object store", func() { - snapshotCheckPoint, err := GetSnapshotCheckPoint(*veleroCfg.ClientToInstallVelero, veleroCfg, 2, - upgradeNamespace, backupName, KibishiiPVCNameList) + backupVolumeInfo, err := GetVolumeInfo( + veleroCfg.ObjectStoreProvider, + veleroCfg.CloudCredentialsFile, + veleroCfg.BSLBucket, + veleroCfg.BSLPrefix, + veleroCfg.BSLConfig, + backupName, + BackupObjectsPrefix+"/"+backupName, + ) + Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup") + snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, 2, upgradeNamespace, backupName, KibishiiPVCNameList) Expect(err).NotTo(HaveOccurred(), "Fail to get snapshot checkpoint") Expect(CheckSnapshotsInProvider( veleroCfg, diff --git a/test/util/kibishii/kibishii_utils.go b/test/util/kibishii/kibishii_utils.go index 4dc42fcd6..9ed6080b5 100644 --- a/test/util/kibishii/kibishii_utils.go +++ b/test/util/kibishii/kibishii_utils.go @@ -141,6 +141,21 @@ func RunKibishiiTests( fmt.Printf("VeleroBackupNamespace done %s\n", time.Now().Format("2006-01-02 15:04:05")) + fmt.Printf("KibishiiVerifyAfterBackup %s\n", time.Now().Format("2006-01-02 15:04:05")) + backupVolumeInfo, err := GetVolumeInfo( + veleroCfg.ObjectStoreProvider, + veleroCfg.CloudCredentialsFile, + veleroCfg.BSLBucket, + veleroCfg.BSLPrefix, + veleroCfg.BSLConfig, + backupName, + BackupObjectsPrefix+"/"+backupName, + ) + if err != nil { + return errors.Wrapf(err, "Failed to get volume info for backup %s", backupName) + } + fmt.Printf("backupVolumeInfo %v\n", backupVolumeInfo) + // Checkpoint for a successful backup if useVolumeSnapshots { if veleroCfg.HasVspherePlugin { @@ -150,7 +165,8 @@ func RunKibishiiTests( return errors.Wrapf(err, "Error waiting for uploads to complete") } } - snapshotCheckPoint, err := GetSnapshotCheckPoint(client, veleroCfg, 2, kibishiiNamespace, backupName, KibishiiPVCNameList) + + snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, DefaultKibishiiWorkerCounts, kibishiiNamespace, backupName, KibishiiPVCNameList) if err != nil { return errors.Wrap(err, "Fail to get snapshot checkpoint") } @@ -186,7 +202,7 @@ func RunKibishiiTests( // wait for a period to confirm no snapshots content exist for the backup time.Sleep(1 * time.Minute) if strings.EqualFold(veleroFeatures, FeatureCSI) { - _, err = GetSnapshotCheckPoint(*veleroCfg.ClientToInstallVelero, veleroCfg, 0, + _, err = BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, 0, kibishiiNamespace, backupName, KibishiiPVCNameList) if err != nil { return errors.Wrap(err, "failed to get snapshot checkPoint") diff --git a/test/util/velero/velero_utils.go b/test/util/velero/velero_utils.go index abb9cdad6..db417a0a3 100644 --- a/test/util/velero/velero_utils.go +++ b/test/util/velero/velero_utils.go @@ -43,13 +43,13 @@ import ( "k8s.io/apimachinery/pkg/util/wait" kbclient "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/vmware-tanzu/velero/internal/volume" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" cliinstall "github.com/vmware-tanzu/velero/pkg/cmd/cli/install" "github.com/vmware-tanzu/velero/pkg/cmd/util/flag" veleroexec "github.com/vmware-tanzu/velero/pkg/util/exec" . "github.com/vmware-tanzu/velero/test" common "github.com/vmware-tanzu/velero/test/util/common" - util "github.com/vmware-tanzu/velero/test/util/csi" . "github.com/vmware-tanzu/velero/test/util/k8s" ) @@ -1277,20 +1277,38 @@ func GetRepositories(ctx context.Context, veleroNamespace, targetNamespace strin return common.GetListByCmdPipes(ctx, cmds) } -func GetSnapshotCheckPoint(client TestClient, veleroCfg VeleroConfig, expectCount int, namespaceBackedUp, backupName string, KibishiiPVCNameList []string) (SnapshotCheckPoint, error) { - var err error +// BuildSnapshotCheckPointFromVolumeInfo pulls snapshot handles directly +func BuildSnapshotCheckPointFromVolumeInfo( + veleroCfg VeleroConfig, + backupVolumeInfo []*volume.BackupVolumeInfo, + expectCount int, + namespaceBackedUp string, + backupName string, + KibishiiPVCNameList []string) (SnapshotCheckPoint, error) { var snapshotCheckPoint SnapshotCheckPoint snapshotCheckPoint.ExpectCount = expectCount snapshotCheckPoint.NamespaceBackedUp = namespaceBackedUp snapshotCheckPoint.PodName = KibishiiPVCNameList + if (veleroCfg.CloudProvider == Azure || veleroCfg.CloudProvider == AWS) && strings.EqualFold(veleroCfg.Features, FeatureCSI) { snapshotCheckPoint.EnableCSI = true - if snapshotCheckPoint.SnapshotIDList, err = util.CheckVolumeSnapshotCR(client, map[string]string{"backupNameLabel": backupName}, expectCount); err != nil { - return snapshotCheckPoint, errors.Wrapf(err, "Fail to get Azure CSI snapshot content") + var VscCount = 0 + for _, volumeInfo := range backupVolumeInfo { + if *volumeInfo.CSISnapshotInfo.ReadyToUse == true { + snapshotCheckPoint.SnapshotIDList = append(snapshotCheckPoint.SnapshotIDList, volumeInfo.CSISnapshotInfo.SnapshotHandle) + VscCount++ + } else { + return snapshotCheckPoint, errors.New("CSI snapshot is not ready to use") + } + } + + if VscCount != expectCount { + return snapshotCheckPoint, errors.New(fmt.Sprintf("CSI snapshot count %d is not as expected %d", VscCount, expectCount)) } } + fmt.Printf("snapshotCheckPoint: %v \n", snapshotCheckPoint) return snapshotCheckPoint, nil }