Refactor Snapshot Checkpoint in E2E Tests (#9083)
Some checks failed
Run the E2E test on kind / build (push) Failing after 8m19s
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 35s
Close stale issues and PRs / stale (push) Successful in 21s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m54s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m25s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m33s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m26s

* Refactor backup volume info retrieval and snapshot checkpoint building in e2e tests
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>

log backup volume info retrieval and snapshot checkpoint building
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>

Add error handling for volume info retrieval in backup tests
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>

Add error handling for volume info retrieval in backup tests
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>

* Update snapshot checkpoint building to use DefaultKibishiiWorkerCounts
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>

---------

Signed-off-by: Priyansh Choudhary <im1706@gmail.com>
This commit is contained in:
Priyansh Choudhary
2025-07-21 20:22:55 +05:30
committed by GitHub
parent 034ce4bde2
commit ff8a070dd3
7 changed files with 128 additions and 26 deletions

View File

@@ -166,12 +166,24 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupLoc
return err
}
backupVolumeInfo, err := GetVolumeInfo(
veleroCfg.ObjectStoreProvider,
veleroCfg.CloudCredentialsFile,
veleroCfg.BSLBucket,
veleroCfg.BSLPrefix,
veleroCfg.BSLConfig,
backupName,
BackupObjectsPrefix+"/"+backupName,
)
if err != nil {
return errors.Wrapf(err, "Failed to get volume info for backup %s", backupName)
}
if useVolumeSnapshots {
// Check for snapshots existence
if veleroCfg.CloudProvider == Vsphere {
// For vSphere, checking snapshot should base on namespace and backup name
for _, ns := range workloadNamespaceList {
snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts, ns, backupName, KibishiiPVCNameList)
snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, DefaultKibishiiWorkerCounts, ns, backupName, KibishiiPVCNameList)
Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint")
err = CheckSnapshotsInProvider(
veleroCfg,
@@ -186,7 +198,7 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupLoc
} else {
// For public cloud, When using backup name to index VolumeSnapshotContents, make sure count of VolumeSnapshotContents should including PVs in all namespace
// so VolumeSnapshotContents count should be equal to "namespace count" * "Kibishii worker count per namespace".
snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts*nsCount, "", backupName, KibishiiPVCNameList)
snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, DefaultKibishiiWorkerCounts*nsCount, "", backupName, KibishiiPVCNameList)
Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint")
// Get all snapshots base on backup name, regardless of namespaces

View File

@@ -54,7 +54,6 @@ func (b *TTL) Init() {
}
func TTLTest() {
var err error
var veleroCfg VeleroConfig
useVolumeSnapshots := true
test := new(TTL)
@@ -138,10 +137,21 @@ func TTLTest() {
})
}
snapshotCheckPoint, err = GetSnapshotCheckPoint(
client,
backupVolumeInfo, err := GetVolumeInfo(
veleroCfg.ObjectStoreProvider,
veleroCfg.CloudCredentialsFile,
veleroCfg.BSLBucket,
veleroCfg.BSLPrefix,
veleroCfg.BSLConfig,
test.backupName,
BackupObjectsPrefix+"/"+test.backupName,
)
Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup")
snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo(
veleroCfg,
2,
backupVolumeInfo,
DefaultKibishiiWorkerCounts,
test.testNS,
test.backupName,
KibishiiPVCNameList,

View File

@@ -52,7 +52,6 @@ func BslDeletionWithRestic() {
}
func BslDeletionTest(useVolumeSnapshots bool) {
var (
err error
veleroCfg VeleroConfig
)
veleroCfg = VeleroCfg
@@ -230,13 +229,25 @@ func BslDeletionTest(useVolumeSnapshots bool) {
backupName2, 1)).To(Succeed())
})
}
backupVolumeInfo, err := GetVolumeInfo(
veleroCfg.ObjectStoreProvider,
veleroCfg.CloudCredentialsFile,
veleroCfg.BSLBucket,
veleroCfg.BSLPrefix,
veleroCfg.BSLConfig,
backupName1,
BackupObjectsPrefix+"/"+backupName1,
)
Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup")
if veleroCfg.CloudProvider != VanillaZFS {
var snapshotCheckPoint SnapshotCheckPoint
snapshotCheckPoint.NamespaceBackedUp = bslDeletionTestNs
By(fmt.Sprintf("Snapshot of bsl %s should be created in cloud object store", backupLocation1), func() {
snapshotCheckPoint, err = GetSnapshotCheckPoint(
*veleroCfg.ClientToInstallVelero,
snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo(
veleroCfg,
backupVolumeInfo,
1,
bslDeletionTestNs,
backupName1,
@@ -251,9 +262,9 @@ func BslDeletionTest(useVolumeSnapshots bool) {
)).To(Succeed())
})
By(fmt.Sprintf("Snapshot of bsl %s should be created in cloud object store", backupLocation2), func() {
snapshotCheckPoint, err = GetSnapshotCheckPoint(
*veleroCfg.ClientToInstallVelero,
snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo(
veleroCfg,
backupVolumeInfo,
1,
bslDeletionTestNs,
backupName2,
@@ -343,10 +354,21 @@ func BslDeletionTest(useVolumeSnapshots bool) {
})
}
backupVolumeInfo, err := GetVolumeInfo(
veleroCfg.ObjectStoreProvider,
veleroCfg.CloudCredentialsFile,
veleroCfg.BSLBucket,
veleroCfg.BSLPrefix,
veleroCfg.BSLConfig,
backupName1,
BackupObjectsPrefix+"/"+backupName1,
)
Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup")
var snapshotCheckPoint SnapshotCheckPoint
snapshotCheckPoint.NamespaceBackedUp = bslDeletionTestNs
By(fmt.Sprintf("Snapshot should not be deleted in cloud object store after deleting bsl %s", backupLocation1), func() {
snapshotCheckPoint, err = GetSnapshotCheckPoint(*veleroCfg.ClientToInstallVelero, veleroCfg, 1, bslDeletionTestNs, backupName1, []string{podName1})
snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, 1, bslDeletionTestNs, backupName1, []string{podName1})
Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint")
Expect(CheckSnapshotsInProvider(
veleroCfg,
@@ -356,9 +378,9 @@ func BslDeletionTest(useVolumeSnapshots bool) {
)).To(Succeed())
})
By(fmt.Sprintf("Snapshot should not be deleted in cloud object store after deleting bsl %s", backupLocation2), func() {
snapshotCheckPoint, err = GetSnapshotCheckPoint(
*veleroCfg.ClientToInstallVelero,
snapshotCheckPoint, err = BuildSnapshotCheckPointFromVolumeInfo(
veleroCfg,
backupVolumeInfo,
1,
bslDeletionTestNs,
backupName2,

View File

@@ -35,6 +35,8 @@ import (
veleroutil "github.com/vmware-tanzu/velero/test/util/velero"
)
const BackupObjectsPrefix = "backups"
type migrationE2E struct {
framework.TestCase
useVolumeSnapshots bool
@@ -263,6 +265,8 @@ func (m *migrationE2E) Backup() error {
snapshotCheckPoint.NamespaceBackedUp = m.CaseBaseName
if OriginVeleroCfg.SnapshotMoveData {
// todo: Remove this as VSC are not preserved post backup. It's 0 by default.
//VolumeSnapshotContent should be deleted after data movement
_, err := util.CheckVolumeSnapshotCR(
*m.VeleroCfg.DefaultClient,
@@ -284,16 +288,27 @@ func (m *migrationE2E) Backup() error {
}
By("Snapshot should be created in cloud object store with retain policy", func() {
snapshotCheckPoint, err = veleroutil.GetSnapshotCheckPoint(
*OriginVeleroCfg.DefaultClient,
backupVolumeInfo, err := providers.GetVolumeInfo(
OriginVeleroCfg.ObjectStoreProvider,
OriginVeleroCfg.CloudCredentialsFile,
OriginVeleroCfg.BSLBucket,
OriginVeleroCfg.BSLPrefix,
OriginVeleroCfg.BSLConfig,
m.BackupName,
BackupObjectsPrefix+"/"+m.BackupName,
)
Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup")
snapshotCheckPoint, err := veleroutil.BuildSnapshotCheckPointFromVolumeInfo(
OriginVeleroCfg,
backupVolumeInfo,
m.kibishiiData.ExpectedNodes,
m.CaseBaseName,
m.BackupName,
kibishii.GetKibishiiPVCNameList(m.kibishiiData.ExpectedNodes),
)
Expect(err).NotTo(HaveOccurred(), "Fail to get snapshot checkpoint")
Expect(providers.CheckSnapshotsInProvider(
OriginVeleroCfg,
m.BackupName,

View File

@@ -199,8 +199,17 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC
var snapshotCheckPoint SnapshotCheckPoint
snapshotCheckPoint.NamespaceBackedUp = upgradeNamespace
By("Snapshot should be created in cloud object store", func() {
snapshotCheckPoint, err := GetSnapshotCheckPoint(*veleroCfg.ClientToInstallVelero, veleroCfg, 2,
upgradeNamespace, backupName, KibishiiPVCNameList)
backupVolumeInfo, err := GetVolumeInfo(
veleroCfg.ObjectStoreProvider,
veleroCfg.CloudCredentialsFile,
veleroCfg.BSLBucket,
veleroCfg.BSLPrefix,
veleroCfg.BSLConfig,
backupName,
BackupObjectsPrefix+"/"+backupName,
)
Expect(err).NotTo(HaveOccurred(), "Failed to get volume info for backup")
snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, 2, upgradeNamespace, backupName, KibishiiPVCNameList)
Expect(err).NotTo(HaveOccurred(), "Fail to get snapshot checkpoint")
Expect(CheckSnapshotsInProvider(
veleroCfg,

View File

@@ -141,6 +141,21 @@ func RunKibishiiTests(
fmt.Printf("VeleroBackupNamespace done %s\n", time.Now().Format("2006-01-02 15:04:05"))
fmt.Printf("KibishiiVerifyAfterBackup %s\n", time.Now().Format("2006-01-02 15:04:05"))
backupVolumeInfo, err := GetVolumeInfo(
veleroCfg.ObjectStoreProvider,
veleroCfg.CloudCredentialsFile,
veleroCfg.BSLBucket,
veleroCfg.BSLPrefix,
veleroCfg.BSLConfig,
backupName,
BackupObjectsPrefix+"/"+backupName,
)
if err != nil {
return errors.Wrapf(err, "Failed to get volume info for backup %s", backupName)
}
fmt.Printf("backupVolumeInfo %v\n", backupVolumeInfo)
// Checkpoint for a successful backup
if useVolumeSnapshots {
if veleroCfg.HasVspherePlugin {
@@ -150,7 +165,8 @@ func RunKibishiiTests(
return errors.Wrapf(err, "Error waiting for uploads to complete")
}
}
snapshotCheckPoint, err := GetSnapshotCheckPoint(client, veleroCfg, 2, kibishiiNamespace, backupName, KibishiiPVCNameList)
snapshotCheckPoint, err := BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, DefaultKibishiiWorkerCounts, kibishiiNamespace, backupName, KibishiiPVCNameList)
if err != nil {
return errors.Wrap(err, "Fail to get snapshot checkpoint")
}
@@ -186,7 +202,7 @@ func RunKibishiiTests(
// wait for a period to confirm no snapshots content exist for the backup
time.Sleep(1 * time.Minute)
if strings.EqualFold(veleroFeatures, FeatureCSI) {
_, err = GetSnapshotCheckPoint(*veleroCfg.ClientToInstallVelero, veleroCfg, 0,
_, err = BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, 0,
kibishiiNamespace, backupName, KibishiiPVCNameList)
if err != nil {
return errors.Wrap(err, "failed to get snapshot checkPoint")

View File

@@ -43,13 +43,13 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/internal/volume"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
cliinstall "github.com/vmware-tanzu/velero/pkg/cmd/cli/install"
"github.com/vmware-tanzu/velero/pkg/cmd/util/flag"
veleroexec "github.com/vmware-tanzu/velero/pkg/util/exec"
. "github.com/vmware-tanzu/velero/test"
common "github.com/vmware-tanzu/velero/test/util/common"
util "github.com/vmware-tanzu/velero/test/util/csi"
. "github.com/vmware-tanzu/velero/test/util/k8s"
)
@@ -1277,20 +1277,38 @@ func GetRepositories(ctx context.Context, veleroNamespace, targetNamespace strin
return common.GetListByCmdPipes(ctx, cmds)
}
func GetSnapshotCheckPoint(client TestClient, veleroCfg VeleroConfig, expectCount int, namespaceBackedUp, backupName string, KibishiiPVCNameList []string) (SnapshotCheckPoint, error) {
var err error
// BuildSnapshotCheckPointFromVolumeInfo pulls snapshot handles directly
func BuildSnapshotCheckPointFromVolumeInfo(
veleroCfg VeleroConfig,
backupVolumeInfo []*volume.BackupVolumeInfo,
expectCount int,
namespaceBackedUp string,
backupName string,
KibishiiPVCNameList []string) (SnapshotCheckPoint, error) {
var snapshotCheckPoint SnapshotCheckPoint
snapshotCheckPoint.ExpectCount = expectCount
snapshotCheckPoint.NamespaceBackedUp = namespaceBackedUp
snapshotCheckPoint.PodName = KibishiiPVCNameList
if (veleroCfg.CloudProvider == Azure || veleroCfg.CloudProvider == AWS) && strings.EqualFold(veleroCfg.Features, FeatureCSI) {
snapshotCheckPoint.EnableCSI = true
if snapshotCheckPoint.SnapshotIDList, err = util.CheckVolumeSnapshotCR(client, map[string]string{"backupNameLabel": backupName}, expectCount); err != nil {
return snapshotCheckPoint, errors.Wrapf(err, "Fail to get Azure CSI snapshot content")
var VscCount = 0
for _, volumeInfo := range backupVolumeInfo {
if *volumeInfo.CSISnapshotInfo.ReadyToUse == true {
snapshotCheckPoint.SnapshotIDList = append(snapshotCheckPoint.SnapshotIDList, volumeInfo.CSISnapshotInfo.SnapshotHandle)
VscCount++
} else {
return snapshotCheckPoint, errors.New("CSI snapshot is not ready to use")
}
}
if VscCount != expectCount {
return snapshotCheckPoint, errors.New(fmt.Sprintf("CSI snapshot count %d is not as expected %d", VscCount, expectCount))
}
}
fmt.Printf("snapshotCheckPoint: %v \n", snapshotCheckPoint)
return snapshotCheckPoint, nil
}