Merge pull request #7122 from danfengliu/add-csi-retain-policy-e2e-test

Add E2E test for taking CSI snapshot to PV with retain reclaim policy
This commit is contained in:
danfengliu
2023-11-22 17:35:35 +08:00
committed by GitHub
12 changed files with 216 additions and 31 deletions

View File

@@ -31,15 +31,33 @@ import (
. "github.com/vmware-tanzu/velero/test/util/velero"
)
type BackupRestoreTestConfig struct {
useVolumeSnapshots bool
kibishiiPatchSubDir string
isRetainPVTest bool
}
func BackupRestoreWithSnapshots() {
BackupRestoreTest(true)
config := BackupRestoreTestConfig{true, "", false}
BackupRestoreTest(config)
}
func BackupRestoreWithRestic() {
BackupRestoreTest(false)
config := BackupRestoreTestConfig{false, "", false}
BackupRestoreTest(config)
}
func BackupRestoreTest(useVolumeSnapshots bool) {
func BackupRestoreRetainedPVWithSnapshots() {
config := BackupRestoreTestConfig{true, "overlays/sc-reclaim-policy/", true}
BackupRestoreTest(config)
}
func BackupRestoreRetainedPVWithRestic() {
config := BackupRestoreTestConfig{false, "overlays/sc-reclaim-policy/", true}
BackupRestoreTest(config)
}
func BackupRestoreTest(backupRestoreTestConfig BackupRestoreTestConfig) {
var (
backupName, restoreName, kibishiiNamespace string
@@ -48,25 +66,34 @@ func BackupRestoreTest(useVolumeSnapshots bool) {
veleroCfg VeleroConfig
)
provideSnapshotVolumesParmInBackup = false
useVolumeSnapshots := backupRestoreTestConfig.useVolumeSnapshots
BeforeEach(func() {
veleroCfg = VeleroCfg
veleroCfg.KibishiiDirectory = veleroCfg.KibishiiDirectory + backupRestoreTestConfig.kibishiiPatchSubDir
veleroCfg.UseVolumeSnapshots = useVolumeSnapshots
veleroCfg.UseNodeAgent = !useVolumeSnapshots
if useVolumeSnapshots && veleroCfg.CloudProvider == "kind" {
Skip("Volume snapshots not supported on kind")
}
var err error
flag.Parse()
UUIDgen, err = uuid.NewRandom()
kibishiiNamespace = "k-" + UUIDgen.String()
Expect(err).To(Succeed())
DeleteStorageClass(context.Background(), *veleroCfg.ClientToInstallVelero, KibishiiStorageClassName)
})
AfterEach(func() {
if !veleroCfg.Debug {
By("Clean backups after test", func() {
DeleteAllBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
if backupRestoreTestConfig.isRetainPVTest {
CleanAllRetainedPV(context.Background(), *veleroCfg.ClientToInstallVelero)
}
DeleteStorageClass(context.Background(), *veleroCfg.ClientToInstallVelero, KibishiiStorageClassName)
})
if veleroCfg.InstallVelero {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
@@ -106,6 +133,9 @@ func BackupRestoreTest(useVolumeSnapshots bool) {
})
It("should successfully back up and restore to an additional BackupStorageLocation with unique credentials", func() {
if backupRestoreTestConfig.isRetainPVTest {
Skip("It's tested by 1st test case")
}
if veleroCfg.AdditionalBSLProvider == "" {
Skip("no additional BSL provider given, not running multiple BackupStorageLocation with unique credentials tests")
}

View File

@@ -102,7 +102,7 @@ func (n *NamespaceMapping) Verify() error {
n.kibishiiData.Levels = len(*n.NSIncluded) + index
By(fmt.Sprintf("Verify workload %s after restore ", ns), func() {
Expect(KibishiiVerifyAfterRestore(n.Client, ns,
n.Ctx, n.kibishiiData)).To(Succeed(), "Fail to verify workload after restore")
n.Ctx, n.kibishiiData, "")).To(Succeed(), "Fail to verify workload after restore")
})
}
for _, ns := range *n.NSIncluded {

View File

@@ -102,6 +102,10 @@ var _ = Describe("[Basic][Restic] Velero tests on cluster using the plugin provi
var _ = Describe("[Basic][Snapshot] Velero tests on cluster using the plugin provider for object storage and snapshots for volume backups", BackupRestoreWithSnapshots)
var _ = Describe("[Basic][Snapshot][RetainPV] Velero tests on cluster using the plugin provider for object storage and snapshots for volume backups", BackupRestoreRetainedPVWithSnapshots)
var _ = Describe("[Basic][Restic][RetainPV] Velero tests on cluster using the plugin provider for object storage and snapshots for volume backups", BackupRestoreRetainedPVWithRestic)
var _ = Describe("[Basic][ClusterResource] Backup/restore of cluster resources", ResourcesCheckTest)
var _ = Describe("[Scale][LongTime] Backup/restore of 2500 namespaces", MultiNSBackupRestore)

View File

@@ -273,15 +273,16 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
}
By(fmt.Sprintf("Install Velero in cluster-B (%s) to restore workload", veleroCfg.StandbyCluster), func() {
//Ensure workload of "migrationNamespace" existed in cluster-A
ns, err := GetNamespace(context.Background(), *veleroCfg.DefaultClient, migrationNamespace)
Expect(ns.Name).To(Equal(migrationNamespace))
Expect(err).NotTo(HaveOccurred())
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("get namespace in cluster-B err: %v", err))
//Ensure cluster-B is the target cluster
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.StandbyCluster)).To(Succeed())
_, err = GetNamespace(context.Background(), *veleroCfg.StandbyClient, migrationNamespace)
Expect(err).To(HaveOccurred())
strings.Contains(fmt.Sprint(err), "namespaces \""+migrationNamespace+"\" not found")
fmt.Println(err)
veleroCfg.ClientToInstallVelero = veleroCfg.StandbyClient
@@ -335,7 +336,7 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
By(fmt.Sprintf("Verify workload %s after restore ", migrationNamespace), func() {
Expect(KibishiiVerifyAfterRestore(*veleroCfg.StandbyClient, migrationNamespace,
oneHourTimeout, &KibishiiData)).To(Succeed(), "Fail to verify workload after restore")
oneHourTimeout, &KibishiiData, "")).To(Succeed(), "Fail to verify workload after restore")
})
// TODO: delete backup created by case self, not all

View File

@@ -180,7 +180,7 @@ func fileContent(namespace, podName, volume string) string {
}
func fileExist(ctx context.Context, namespace, podName, volume string) error {
c, err := ReadFileFromPodVolume(ctx, namespace, podName, podName, volume, FILE_NAME)
c, _, err := ReadFileFromPodVolume(ctx, namespace, podName, podName, volume, FILE_NAME)
if err != nil {
return errors.Wrap(err, fmt.Sprintf("Fail to read file %s from volume %s of pod %s in %s ",
FILE_NAME, volume, podName, namespace))
@@ -195,7 +195,7 @@ func fileExist(ctx context.Context, namespace, podName, volume string) error {
}
}
func fileNotExist(ctx context.Context, namespace, podName, volume string) error {
_, err := ReadFileFromPodVolume(ctx, namespace, podName, podName, volume, FILE_NAME)
_, _, err := ReadFileFromPodVolume(ctx, namespace, podName, podName, volume, FILE_NAME)
if err != nil {
return nil
} else {

View File

@@ -24,7 +24,6 @@ import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
@@ -164,7 +163,7 @@ func (r *ResourcePoliciesCase) Verify() error {
if vol.Name != volName {
continue
}
content, err := ReadFileFromPodVolume(r.Ctx, ns, pod.Name, "container-busybox", vol.Name, FileName)
content, _, err := ReadFileFromPodVolume(r.Ctx, ns, pod.Name, "container-busybox", vol.Name, FileName)
if i%2 == 0 {
Expect(err).To(HaveOccurred(), "Expected file not found") // File should not exist
} else {

View File

@@ -29,7 +29,6 @@ import (
. "github.com/vmware-tanzu/velero/test"
. "github.com/vmware-tanzu/velero/test/util/k8s"
. "github.com/vmware-tanzu/velero/test/util/kibishii"
. "github.com/vmware-tanzu/velero/test/util/providers"
. "github.com/vmware-tanzu/velero/test/util/velero"
)
@@ -256,7 +255,7 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC
By(fmt.Sprintf("Verify workload %s after restore ", upgradeNamespace), func() {
Expect(KibishiiVerifyAfterRestore(*veleroCfg.ClientToInstallVelero, upgradeNamespace,
oneHourTimeout, DefaultKibishiiData)).To(Succeed(), "Fail to verify workload after restore")
oneHourTimeout, DefaultKibishiiData, "")).To(Succeed(), "Fail to verify workload after restore")
})
})
})

View File

@@ -21,14 +21,12 @@ import (
"fmt"
"strings"
"github.com/pkg/errors"
snapshotterClientSet "github.com/kubernetes-csi/external-snapshotter/client/v4/clientset/versioned"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
. "github.com/vmware-tanzu/velero/test/util/k8s"
)
@@ -128,6 +126,7 @@ func GetCsiSnapshotHandleV1(client TestClient, backupName string) ([]string, err
}
return snapshotHandleList, nil
}
func GetVolumeSnapshotContentNameByPod(client TestClient, podName, namespace, backupName string) (string, error) {
pvcList, err := GetPvcByPVCName(context.Background(), namespace, podName)
if err != nil {

View File

@@ -104,7 +104,6 @@ func GetPvcByPVCName(ctx context.Context, namespace, pvcName string) ([]string,
Args: []string{"{print $1}"},
}
cmds = append(cmds, cmd)
return common.GetListByCmdPipes(ctx, cmds)
}
@@ -279,15 +278,30 @@ func CreateFileToPod(ctx context.Context, namespace, podName, containerName, vol
fmt.Printf("Kubectl exec cmd =%v\n", cmd)
return cmd.Run()
}
func ReadFileFromPodVolume(ctx context.Context, namespace, podName, containerName, volume, filename string) (string, error) {
func FileExistInPV(ctx context.Context, namespace, podName, containerName, volume, filename string) (bool, error) {
stdout, stderr, err := ReadFileFromPodVolume(ctx, namespace, podName, containerName, volume, filename)
output := fmt.Sprintf("%s:%s", stdout, stderr)
if strings.Contains(output, fmt.Sprintf("/%s/%s: No such file or directory", volume, filename)) {
return false, nil
} else {
if err == nil {
return true, nil
} else {
return false, errors.Wrap(err, fmt.Sprintf("Fail to read file %s from volume %s of pod %s in %s",
filename, volume, podName, namespace))
}
}
}
func ReadFileFromPodVolume(ctx context.Context, namespace, podName, containerName, volume, filename string) (string, string, error) {
arg := []string{"exec", "-n", namespace, "-c", containerName, podName,
"--", "cat", fmt.Sprintf("/%s/%s", volume, filename)}
cmd := exec.CommandContext(ctx, "kubectl", arg...)
fmt.Printf("Kubectl exec cmd =%v\n", cmd)
stdout, stderr, err := veleroexec.RunCommand(cmd)
fmt.Print(stdout)
fmt.Print(stderr)
return stdout, err
fmt.Printf("stdout: %s\n", stdout)
fmt.Printf("stderr: %s\n", stderr)
return stdout, stderr, err
}
func RunCommand(cmdName string, arg []string) string {

View File

@@ -22,10 +22,9 @@ import (
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/util/retry"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/retry"
)
func CreatePersistentVolume(client TestClient, name string) (*corev1.PersistentVolume, error) {
@@ -93,3 +92,16 @@ func ClearClaimRefForFailedPVs(ctx context.Context, client TestClient) error {
return nil
}
func GetAllPVNames(ctx context.Context, client TestClient) ([]string, error) {
var pvNameList []string
pvList, err := client.ClientGo.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("failed to List PV")
}
for _, pvName := range pvList.Items {
pvNameList = append(pvNameList, pvName.Name)
}
return pvNameList, nil
}

View File

@@ -51,6 +51,7 @@ type KibishiiData struct {
var DefaultKibishiiWorkerCounts = 2
var DefaultKibishiiData = &KibishiiData{2, 10, 10, 1024, 1024, 0, DefaultKibishiiWorkerCounts}
var KibishiiPodNameList = []string{"kibishii-deployment-0", "kibishii-deployment-1"}
var KibishiiPVCNameList = []string{"kibishii-data-kibishii-deployment-0", "kibishii-data-kibishii-deployment-1"}
var KibishiiStorageClassName = "kibishii-storage-class"
@@ -107,6 +108,8 @@ func RunKibishiiTests(veleroCfg VeleroConfig, backupName, restoreName, backupLoc
}
fmt.Printf("VeleroBackupNamespace done %s\n", time.Now().Format("2006-01-02 15:04:05"))
// Checkpoint for a successful backup
if useVolumeSnapshots {
if providerName == "vsphere" {
// Wait for uploads started by the Velero Plugin for vSphere to complete
@@ -165,11 +168,49 @@ func RunKibishiiTests(veleroCfg VeleroConfig, backupName, restoreName, backupLoc
}
}
// Modify PV data right after backup. If PV's reclaim policy is retain, PV will be restored with the origin resource config
fileName := "file-" + kibishiiNamespace
fileBaseContent := fileName
fmt.Printf("Re-poulate volume %s\n", time.Now().Format("2006-01-02 15:04:05"))
for _, pod := range KibishiiPodNameList {
// To ensure Kibishii verification result is accurate
ClearKibishiiData(oneHourTimeout, kibishiiNamespace, pod, "kibishii", "data")
fileContent := fileBaseContent + pod
err := CreateFileToPod(oneHourTimeout, kibishiiNamespace, pod, "kibishii", "data",
fileName, fileContent)
if err != nil {
return errors.Wrapf(err, "failed to create file %s", fileName)
}
}
fmt.Printf("Re-poulate volume done %s\n", time.Now().Format("2006-01-02 15:04:05"))
pvList := []string{}
if strings.Contains(veleroCfg.KibishiiDirectory, "sc-reclaim-policy") {
// Get leftover PV list for PV cleanup
for _, pvc := range KibishiiPVCNameList {
pv, err := GetPvName(oneHourTimeout, client, pvc, kibishiiNamespace)
if err != nil {
errors.Wrapf(err, "failed to delete namespace %s", kibishiiNamespace)
}
pvList = append(pvList, pv)
}
}
fmt.Printf("Simulating a disaster by removing namespace %s %s\n", kibishiiNamespace, time.Now().Format("2006-01-02 15:04:05"))
if err := DeleteNamespace(oneHourTimeout, client, kibishiiNamespace, true); err != nil {
return errors.Wrapf(err, "failed to delete namespace %s", kibishiiNamespace)
}
if strings.Contains(veleroCfg.KibishiiDirectory, "sc-reclaim-policy") {
// In scenario of CSI PV-retain-policy test, to restore PV of the backed up resource, we should make sure
// there are no PVs of the same name left, because in previous test step, PV's reclaim policy is retain,
// so PVs are not deleted although workload namespace is destroyed.
if err := DeletePVs(oneHourTimeout, *veleroCfg.ClientToInstallVelero, pvList); err != nil {
return errors.Wrapf(err, "failed to delete PVs %v", pvList)
}
}
// the snapshots of AWS may be still in pending status when do the restore, wait for a while
// to avoid this https://github.com/vmware-tanzu/velero/issues/1799
// TODO remove this after https://github.com/vmware-tanzu/velero/issues/3533 is fixed
@@ -191,10 +232,12 @@ func RunKibishiiTests(veleroCfg VeleroConfig, backupName, restoreName, backupLoc
return errors.New(fmt.Sprintf("PVR count %d is not as expected %d", len(pvrs), pvCount))
}
}
fmt.Printf("KibishiiVerifyAfterRestore %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := KibishiiVerifyAfterRestore(client, kibishiiNamespace, oneHourTimeout, DefaultKibishiiData); err != nil {
if err := KibishiiVerifyAfterRestore(client, kibishiiNamespace, oneHourTimeout, DefaultKibishiiData, fileName); err != nil {
return errors.Wrapf(err, "Error verifying kibishii after restore")
}
fmt.Printf("kibishii test completed successfully %s\n", time.Now().Format("2006-01-02 15:04:05"))
return nil
}
@@ -309,6 +352,15 @@ func waitForKibishiiPods(ctx context.Context, client TestClient, kibishiiNamespa
return WaitForPods(ctx, client, kibishiiNamespace, []string{"jump-pad", "etcd0", "etcd1", "etcd2", "kibishii-deployment-0", "kibishii-deployment-1"})
}
func KibishiiGenerateData(oneHourTimeout context.Context, kibishiiNamespace string, kibishiiData *KibishiiData) error {
fmt.Printf("generateData %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := generateData(oneHourTimeout, kibishiiNamespace, kibishiiData); err != nil {
return errors.Wrap(err, "Failed to generate data")
}
fmt.Printf("generateData done %s\n", time.Now().Format("2006-01-02 15:04:05"))
return nil
}
func KibishiiPrepareBeforeBackup(oneHourTimeout context.Context, client TestClient,
providerName, kibishiiNamespace, registryCredentialFile, veleroFeatures,
kibishiiDirectory string, useVolumeSnapshots bool, kibishiiData *KibishiiData) error {
@@ -338,16 +390,12 @@ func KibishiiPrepareBeforeBackup(oneHourTimeout context.Context, client TestClie
if kibishiiData == nil {
kibishiiData = DefaultKibishiiData
}
fmt.Printf("generateData %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := generateData(oneHourTimeout, kibishiiNamespace, kibishiiData); err != nil {
return errors.Wrap(err, "Failed to generate data")
}
fmt.Printf("generateData done %s\n", time.Now().Format("2006-01-02 15:04:05"))
KibishiiGenerateData(oneHourTimeout, kibishiiNamespace, kibishiiData)
return nil
}
func KibishiiVerifyAfterRestore(client TestClient, kibishiiNamespace string, oneHourTimeout context.Context,
kibishiiData *KibishiiData) error {
kibishiiData *KibishiiData, incrementalFileName string) error {
if kibishiiData == nil {
kibishiiData = DefaultKibishiiData
}
@@ -357,6 +405,18 @@ func KibishiiVerifyAfterRestore(client TestClient, kibishiiNamespace string, one
if err := waitForKibishiiPods(oneHourTimeout, client, kibishiiNamespace); err != nil {
return errors.Wrapf(err, "Failed to wait for ready status of kibishii pods in %s", kibishiiNamespace)
}
if incrementalFileName != "" {
for _, pod := range KibishiiPodNameList {
exist, err := FileExistInPV(oneHourTimeout, kibishiiNamespace, pod, "kibishii", "data", incrementalFileName)
if err != nil {
return errors.Wrapf(err, fmt.Sprintf("fail to get file %s", incrementalFileName))
}
if exist {
return errors.New("Unexpected incremental data exist")
}
}
}
// TODO - check that namespace exists
fmt.Printf("running kibishii verify\n")
@@ -365,3 +425,11 @@ func KibishiiVerifyAfterRestore(client TestClient, kibishiiNamespace string, one
}
return nil
}
func ClearKibishiiData(ctx context.Context, namespace, podName, containerName, dir string) error {
arg := []string{"exec", "-n", namespace, "-c", containerName, podName,
"--", "/bin/sh", "-c", "rm -rf /" + dir + "/*"}
cmd := exec.CommandContext(ctx, "kubectl", arg...)
fmt.Printf("Kubectl exec cmd =%v\n", cmd)
return cmd.Run()
}

View File

@@ -1561,3 +1561,62 @@ func InstallTestStorageClasses(path string) error {
}
return InstallStorageClass(ctx, tmpFile.Name())
}
func GetPvName(ctx context.Context, client TestClient, pvcName, namespace string) (string, error) {
pvcList, err := GetPvcByPVCName(context.Background(), namespace, pvcName)
if err != nil {
return "", err
}
if len(pvcList) != 1 {
return "", errors.New(fmt.Sprintf("Only 1 PV of PVC %s pod %s should be found under namespace %s", pvcList[0], pvcName, namespace))
}
pvList, err := GetPvByPvc(context.Background(), namespace, pvcList[0])
if err != nil {
return "", err
}
if len(pvList) != 1 {
return "", errors.New(fmt.Sprintf("Only 1 PV of PVC %s pod %s should be found under namespace %s", pvcList[0], pvcName, namespace))
}
return pvList[0], nil
}
func DeletePVs(ctx context.Context, client TestClient, pvList []string) error {
for _, pv := range pvList {
args := []string{"delete", "pv", pv, "--timeout=0s"}
fmt.Println(args)
err := exec.CommandContext(ctx, "kubectl", args...).Run()
if err != nil {
return errors.New(fmt.Sprintf("Deleted PV %s ", pv))
}
}
return nil
}
func CleanAllRetainedPV(ctx context.Context, client TestClient) {
pvNameList, err := GetAllPVNames(ctx, client)
if err != nil {
fmt.Println("fail to list PV")
}
for _, pv := range pvNameList {
args := []string{"patch", "pv", pv, "-p", "{\"spec\":{\"persistentVolumeReclaimPolicy\":\"Delete\"}}"}
fmt.Println(args)
cmd := exec.CommandContext(ctx, "kubectl", args...)
stdout, errMsg, err := veleroexec.RunCommand(cmd)
if err != nil {
fmt.Printf("fail to patch PV %s reclaim policy to delete: stdout: %s, stderr: %s", pv, stdout, errMsg)
}
args = []string{"delete", "pv", pv, "--timeout=60s"}
fmt.Println(args)
cmd = exec.CommandContext(ctx, "kubectl", args...)
stdout, errMsg, err = veleroexec.RunCommand(cmd)
if err != nil {
fmt.Printf("fail to delete PV %s reclaim policy to delete: stdout: %s, stderr: %s", pv, stdout, errMsg)
}
}
}