From e9c997839ebbb04688a88ffb981eeb74de51b380 Mon Sep 17 00:00:00 2001 From: "David L. Smith-Uchida" Date: Wed, 17 Mar 2021 11:38:47 -0700 Subject: [PATCH] Added volume snapshot test for backup/restore. (#3592) Snapshot tests can be run with Ginkgo focus "Snapshot" and restic tests with Ginkgo focus "Restic". Restic and volume snapshot tests can now be run simultaneously. Added check for kibishii app start after restore. Consolidated kibishii pod checks into waitForKibishiiPods. Added WaitForPods function to e2e/tests/common.goSnapshot tests are skipped automatically on kind clusters. Fixed issue where velero_utils InstallVeleroServer was looking for the Restic daemon set in the "velero" namespace only (was ignoring io.Namespace) Signed-off-by: Dave Smith-Uchida --- go.mod | 2 +- go.sum | 4 +- test/e2e/backup_test.go | 23 ++++++++++-- test/e2e/common.go | 29 +++++++++++++++ test/e2e/e2e_suite_test.go | 3 +- test/e2e/enable_api_group_versions_test.go | 10 +++-- test/e2e/kibishii_tests.go | 43 ++++++++++------------ test/e2e/velero_utils.go | 11 ++++-- 8 files changed, 85 insertions(+), 40 deletions(-) diff --git a/go.mod b/go.mod index a0125034f..8e27497ef 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/hashicorp/go-plugin v0.0.0-20190610192547-a1bc61569a26 github.com/joho/godotenv v1.3.0 github.com/kubernetes-csi/external-snapshotter/client/v4 v4.0.0 - github.com/onsi/ginkgo v1.15.1 + github.com/onsi/ginkgo v1.15.2 github.com/onsi/gomega v1.10.2 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.7.1 diff --git a/go.sum b/go.sum index b99400f65..55a53c12d 100644 --- a/go.sum +++ b/go.sum @@ -431,8 +431,8 @@ github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo v1.15.1 h1:DsXNrKujDlkMS9Rsxmd+Fg7S6Kc5lhE+qX8tY6laOxc= -github.com/onsi/ginkgo v1.15.1/go.mod h1:Dd6YFfwBW84ETqqtL0CPyPXillHgY6XhQH3uuCCTr/o= +github.com/onsi/ginkgo v1.15.2 h1:l77YT15o814C2qVL47NOyjV/6RbaP7kKdrvZnxQ3Org= +github.com/onsi/ginkgo v1.15.2/go.mod h1:Dd6YFfwBW84ETqqtL0CPyPXillHgY6XhQH3uuCCTr/o= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= diff --git a/test/e2e/backup_test.go b/test/e2e/backup_test.go index 1b058d977..20dc96ecb 100644 --- a/test/e2e/backup_test.go +++ b/test/e2e/backup_test.go @@ -20,7 +20,19 @@ var ( ) // Test backup and restore of Kibishi using restic -var _ = Describe("[Restic] Velero tests on cluster using the plugin provider for object storage and Restic for volume backups", func() { +var _ = Describe("[Restic] Velero tests on cluster using the plugin provider for object storage and Restic for volume backups", backup_restore_with_restic) + +var _ = Describe("[Snapshot] Velero tests on cluster using the plugin provider for object storage and snapshots for volume backups", backup_restore_with_snapshots) + +func backup_restore_with_snapshots() { + backup_restore_test(true) +} + +func backup_restore_with_restic() { + backup_restore_test(false) +} + +func backup_restore_test(useVolumeSnapshots bool) { var ( client *kubernetes.Clientset extensionsClient *apiextensionsclientset.Clientset @@ -29,6 +41,9 @@ var _ = Describe("[Restic] Velero tests on cluster using the plugin provider for ) BeforeEach(func() { + if useVolumeSnapshots && cloudProvider == "kind" { + Skip("Volume snapshots not supported on kind") + } var err error flag.Parse() uuidgen, err = uuid.NewRandom() @@ -57,7 +72,7 @@ var _ = Describe("[Restic] Velero tests on cluster using the plugin provider for restoreName = "restore-" + uuidgen.String() // Even though we are using Velero's CloudProvider plugin for object storage, the kubernetes cluster is running on // KinD. So use the kind installation for Kibishii. - Expect(RunKibishiiTests(client, cloudProvider, veleroCLI, veleroNamespace, backupName, restoreName, "")).To(Succeed(), + Expect(RunKibishiiTests(client, cloudProvider, veleroCLI, veleroNamespace, backupName, restoreName, "", useVolumeSnapshots)).To(Succeed(), "Failed to successfully backup and restore Kibishii namespace") }) @@ -105,9 +120,9 @@ var _ = Describe("[Restic] Velero tests on cluster using the plugin provider for backupName = fmt.Sprintf("backup-%s-%s", bsl, uuidgen) restoreName = fmt.Sprintf("restore-%s-%s", bsl, uuidgen) - Expect(RunKibishiiTests(client, cloudProvider, veleroCLI, veleroNamespace, backupName, restoreName, bsl)).To(Succeed(), + Expect(RunKibishiiTests(client, cloudProvider, veleroCLI, veleroNamespace, backupName, restoreName, bsl, useVolumeSnapshots)).To(Succeed(), "Failed to successfully backup and restore Kibishii namespace using BSL %s", bsl) } }) }) -}) +} diff --git a/test/e2e/common.go b/test/e2e/common.go index 961eea24b..6dbfc0291 100644 --- a/test/e2e/common.go +++ b/test/e2e/common.go @@ -6,6 +6,8 @@ import ( "os/exec" "time" + corev1api "k8s.io/api/core/v1" + "github.com/pkg/errors" "golang.org/x/net/context" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -63,3 +65,30 @@ func CreateSecretFromFiles(ctx context.Context, client *kubernetes.Clientset, na _, err := client.CoreV1().Secrets(namespace).Create(ctx, secret, metav1.CreateOptions{}) return err } + +/* + Waits until all of the pods have gone to PodRunning state +*/ +func WaitForPods(ctx context.Context, client *kubernetes.Clientset, namespace string, pods []string) error { + timeout := 10 * time.Minute + interval := 5 * time.Second + err := wait.PollImmediate(interval, timeout, func() (bool, error) { + for _, podName := range pods { + checkPod, err := client.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return false, errors.WithMessage(err, fmt.Sprintf("Failed to verify pod %s/%s is %s", namespace, podName, corev1api.PodRunning)) + } + // If any pod is still waiting we don't need to check any more so return and wait for next poll interval + if checkPod.Status.Phase != corev1api.PodRunning { + fmt.Printf("Pod %s is in state %s waiting for it to be %s\n", podName, checkPod.Status.Phase, corev1api.PodRunning) + return false, nil + } + } + // All pods were in PodRunning state, we're successful + return true, nil + }) + if err != nil { + return errors.Wrapf(err, fmt.Sprintf("Failed to wait for pods in namespace %s to start running", namespace)) + } + return nil +} diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 6ab7ca91b..ecac96bd4 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -11,7 +11,7 @@ import ( var ( veleroCLI, veleroImage, cloudCredentialsFile, bslConfig, bslBucket, bslPrefix, vslConfig, cloudProvider, objectStoreProvider, veleroNamespace string additionalBSLProvider, additionalBSLBucket, additionalBSLPrefix, additionalBSLConfig, additionalBSLCredentials string - installVelero, useVolumeSnapshots bool + installVelero bool ) func init() { @@ -26,7 +26,6 @@ func init() { flag.StringVar(&vslConfig, "vsl-config", "", "configuration to use for the volume snapshot location. Format is key1=value1,key2=value2") flag.StringVar(&veleroNamespace, "velero-namespace", "velero", "Namespace to install Velero into") flag.BoolVar(&installVelero, "install-velero", true, "Install/uninstall velero during the test. Optional.") - flag.BoolVar(&useVolumeSnapshots, "use-volume-snapshots", false, "Use volume-snapshotter plugin for volume backup. Optional") // Flags to create an additional BSL for multiple credentials test flag.StringVar(&additionalBSLProvider, "additional-bsl-object-store-provider", "", "Provider of object store plugin for additional backup storage location. Required if testing multiple credentials support.") diff --git a/test/e2e/enable_api_group_versions_test.go b/test/e2e/enable_api_group_versions_test.go index dc5b69712..20be75bf6 100644 --- a/test/e2e/enable_api_group_versions_test.go +++ b/test/e2e/enable_api_group_versions_test.go @@ -10,9 +10,12 @@ import ( "strings" "time" + "github.com/google/uuid" + + "github.com/vmware-tanzu/velero/pkg/util/kube" + apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" - "github.com/google/uuid" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "github.com/pkg/errors" @@ -23,7 +26,6 @@ import ( "github.com/vmware-tanzu/velero/pkg/builder" veleroexec "github.com/vmware-tanzu/velero/pkg/util/exec" - "github.com/vmware-tanzu/velero/pkg/util/kube" ) var _ = Describe("[APIGroup] Velero tests with various CRD API group versions", func() { @@ -220,7 +222,7 @@ func RunEnableAPIGroupVersionsTests(ctx context.Context, resource, group string, // TODO - Velero needs to be installed AFTER CRDs are installed because of https://github.com/vmware-tanzu/velero/issues/3471 // Once that issue is fixed, we should install Velero once for the test suite if installVelero { - VeleroInstall(context.Background(), veleroImage, veleroNamespace, cloudProvider, objectStoreProvider, useVolumeSnapshots, + VeleroInstall(context.Background(), veleroImage, veleroNamespace, cloudProvider, objectStoreProvider, false, cloudCredentialsFile, bslBucket, bslPrefix, bslConfig, vslConfig, "EnableAPIGroupVersions" /* TODO - remove this when the feature flag is removed */) fmt.Println("Sleep 20s to wait for Velero to stabilize after install.") @@ -230,7 +232,7 @@ func RunEnableAPIGroupVersionsTests(ctx context.Context, resource, group string, backup := "backup-rockbands-" + uuidgen.String() + "-" + strconv.Itoa(i) namespacesStr := strings.Join(tc.namespaces, ",") - err = VeleroBackupNamespace(ctx, veleroCLI, veleroNamespace, backup, namespacesStr, "") + err = VeleroBackupNamespace(ctx, veleroCLI, veleroNamespace, backup, namespacesStr, "", false) if err != nil { VeleroBackupLogs(ctx, veleroCLI, veleroNamespace, backup) return errors.Wrapf(err, "backing up %s namespaces on source cluster", namespacesStr) diff --git a/test/e2e/kibishii_tests.go b/test/e2e/kibishii_tests.go index 416cb8b83..624be7ad0 100644 --- a/test/e2e/kibishii_tests.go +++ b/test/e2e/kibishii_tests.go @@ -10,9 +10,7 @@ import ( "golang.org/x/net/context" "k8s.io/client-go/kubernetes" - corev1api "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" veleroexec "github.com/vmware-tanzu/velero/pkg/util/exec" ) @@ -47,11 +45,6 @@ func installKibishii(ctx context.Context, namespace string, cloudPlatform string return errors.Wrapf(err, "Failed to wait for ready status of pod %s/%s", namespace, jumpPadPod) } - // TODO - Fix kibishii so we can check that it is ready to go - // Wait for etcd run as kibishii workload to be ready - fmt.Printf("Waiting for etcd workload pods to be ready\n") - exec.CommandContext(ctx, "kubectl", "wait", "--for=condition=ready", "pod/etcd0", "pod/etcd1", "pod/etcd2") - return err } @@ -85,7 +78,8 @@ func verifyData(ctx context.Context, namespace string, levels int, filesPerLevel } // RunKibishiiTests runs kibishii tests on the provider. -func RunKibishiiTests(client *kubernetes.Clientset, providerName, veleroCLI, veleroNamespace, backupName, restoreName, backupLocation string) error { +func RunKibishiiTests(client *kubernetes.Clientset, providerName, veleroCLI, veleroNamespace, backupName, restoreName, backupLocation string, + useVolumeSnapshots bool) error { fiveMinTimeout, _ := context.WithTimeout(context.Background(), 5*time.Minute) oneHourTimeout, _ := context.WithTimeout(context.Background(), time.Minute*60) timeout := 10 * time.Minute @@ -99,11 +93,18 @@ func RunKibishiiTests(client *kubernetes.Clientset, providerName, veleroCLI, vel return errors.Wrap(err, "Failed to install Kibishii workload") } + // wait for kibishii pod startup + // TODO - Fix kibishii so we can check that it is ready to go + fmt.Printf("Waiting for kibishii pods to be ready\n") + if err := waitForKibishiiPods(oneHourTimeout, client, kibishiiNamespace); err != nil { + return errors.Wrapf(err, "Failed to wait for ready status of kibishii pods in %s", kibishiiNamespace) + } + if err := generateData(oneHourTimeout, kibishiiNamespace, 2, 10, 10, 1024, 1024, 0, 2); err != nil { return errors.Wrap(err, "Failed to generate data") } - if err := VeleroBackupNamespace(oneHourTimeout, veleroCLI, veleroNamespace, backupName, kibishiiNamespace, backupLocation); err != nil { + if err := VeleroBackupNamespace(oneHourTimeout, veleroCLI, veleroNamespace, backupName, kibishiiNamespace, backupLocation, useVolumeSnapshots); err != nil { VeleroBackupLogs(fiveMinTimeout, veleroCLI, veleroNamespace, backupName) return errors.Wrapf(err, "Failed to backup kibishii namespace %s", kibishiiNamespace) } @@ -124,19 +125,10 @@ func RunKibishiiTests(client *kubernetes.Clientset, providerName, veleroCLI, vel } // wait for kibishii pod startup - err = wait.PollImmediate(interval, timeout, func() (bool, error) { - kp, err := client.CoreV1().Pods(kibishiiNamespace).Get(context.TODO(), jumpPadPod, metav1.GetOptions{}) - if err != nil { - return false, errors.Wrapf(err, fmt.Sprintf("Failed to verify pod %s/%s is %s", kibishiiNamespace, jumpPadPod, corev1api.PodRunning)) - } - if kp.Status.Phase != corev1api.PodRunning { - fmt.Printf("Pod %s is in state %s waiting for it to be %s\n", jumpPadPod, kp.Status.Phase, corev1api.PodRunning) - return false, nil - } - return true, nil - }) - if err != nil { - return errors.Wrapf(err, fmt.Sprintf("Failed to wait for pod %s/%s to start running", kibishiiNamespace, jumpPadPod)) + // TODO - Fix kibishii so we can check that it is ready to go + fmt.Printf("Waiting for kibishii pods to be ready\n") + if err := waitForKibishiiPods(oneHourTimeout, client, kibishiiNamespace); err != nil { + return errors.Wrapf(err, "Failed to wait for ready status of kibishii pods in %s", kibishiiNamespace) } // TODO - check that namespace exists @@ -149,10 +141,13 @@ func RunKibishiiTests(client *kubernetes.Clientset, providerName, veleroCLI, vel return errors.Wrapf(err, "Failed to cleanup %s wrokload namespace", kibishiiNamespace) } // wait for ns delete - err = WaitForNamespaceDeletion(interval, timeout, client, kibishiiNamespace) - if err != nil { + if err = WaitForNamespaceDeletion(interval, timeout, client, kibishiiNamespace); err != nil { return errors.Wrapf(err, fmt.Sprintf("Failed to wait for deletion of namespace %s", kibishiiNamespace)) } fmt.Printf("kibishii test completed successfully\n") return nil } + +func waitForKibishiiPods(ctx context.Context, client *kubernetes.Clientset, kibishiiNamespace string) error { + return WaitForPods(ctx, client, kibishiiNamespace, []string{"jump-pad", "etcd0", "etcd1", "etcd2", "kibishii-deployment-0", "kibishii-deployment-1"}) +} diff --git a/test/e2e/velero_utils.go b/test/e2e/velero_utils.go index a6eb678b5..d23d969e2 100644 --- a/test/e2e/velero_utils.go +++ b/test/e2e/velero_utils.go @@ -115,7 +115,7 @@ func InstallVeleroServer(io *cliinstall.InstallOptions) error { if io.UseRestic { fmt.Println("Waiting for Velero restic daemonset to be ready.") - if _, err = install.DaemonSetIsReady(factory, "velero"); err != nil { + if _, err = install.DaemonSetIsReady(factory, io.Namespace); err != nil { return errors.Wrap(err, errorMsg) } } @@ -212,15 +212,20 @@ func CheckRestorePhase(ctx context.Context, veleroCLI string, veleroNamespace st } // VeleroBackupNamespace uses the veleroCLI to backup a namespace. -func VeleroBackupNamespace(ctx context.Context, veleroCLI string, veleroNamespace string, backupName string, namespace string, backupLocation string) error { +func VeleroBackupNamespace(ctx context.Context, veleroCLI string, veleroNamespace string, backupName string, namespace string, backupLocation string, + useVolumeSnapshots bool) error { args := []string{ "--namespace", veleroNamespace, "create", "backup", backupName, "--include-namespaces", namespace, - "--default-volumes-to-restic", "--wait", } + if useVolumeSnapshots { + args = append(args, "--snapshot-volumes") + } else { + args = append(args, "--default-volumes-to-restic") + } if backupLocation != "" { args = append(args, "--storage-location", backupLocation) }