diff --git a/changelogs/unreleased/7077-Lyndon-Li b/changelogs/unreleased/7077-Lyndon-Li new file mode 100644 index 000000000..802609edf --- /dev/null +++ b/changelogs/unreleased/7077-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #6693, partially fail restore if CSI snapshot is involved but CSI feature is not ready, i.e., CSI feature gate is not enabled or CSI plugin is not installed. \ No newline at end of file diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index f8b03f8e3..6507456b2 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -72,7 +72,6 @@ import ( "github.com/vmware-tanzu/velero/pkg/persistence" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt/process" - "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/podexec" "github.com/vmware-tanzu/velero/pkg/podvolume" "github.com/vmware-tanzu/velero/pkg/repository" @@ -268,6 +267,7 @@ type server struct { mgr manager.Manager credentialFileStore credentials.FileStore credentialSecretStore credentials.SecretStore + featureVerifier features.Verifier } func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*server, error) { @@ -309,11 +309,10 @@ func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*s return nil, err } - if !features.IsEnabled(velerov1api.CSIFeatureFlag) { - _, err = pluginRegistry.Get(common.PluginKindBackupItemActionV2, "velero.io/csi-pvc-backupper") - if err == nil { - logger.Warn("CSI plugins are registered, but the EnableCSI feature is not enabled.") - } + featureVerifier := features.NewVerifier(pluginRegistry) + + if _, err := featureVerifier.Verify(velerov1api.CSIFeatureFlag); err != nil { + logger.WithError(err).Warn("CSI feature verification failed, the feature may not be ready.") } // cancelFunc is not deferred here because if it was, then ctx would immediately @@ -397,6 +396,7 @@ func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*s mgr: mgr, credentialFileStore: credentialFileStore, credentialSecretStore: credentialSecretStore, + featureVerifier: featureVerifier, } // Setup CSI snapshot client and lister @@ -942,6 +942,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string s.kubeClient.CoreV1().RESTClient(), s.credentialFileStore, s.mgr.GetClient(), + s.featureVerifier, ) cmd.CheckError(err) diff --git a/pkg/features/mocks/PluginFinder.go b/pkg/features/mocks/PluginFinder.go new file mode 100644 index 000000000..7659c1772 --- /dev/null +++ b/pkg/features/mocks/PluginFinder.go @@ -0,0 +1,43 @@ +// Code generated by mockery v2.22.1. DO NOT EDIT. + +package mocks + +import ( + common "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" + + mock "github.com/stretchr/testify/mock" +) + +// PluginFinder is an autogenerated mock type for the PluginFinder type +type PluginFinder struct { + mock.Mock +} + +// Find provides a mock function with given fields: kind, name +func (_m *PluginFinder) Find(kind common.PluginKind, name string) bool { + ret := _m.Called(kind, name) + + var r0 bool + if rf, ok := ret.Get(0).(func(common.PluginKind, string) bool); ok { + r0 = rf(kind, name) + } else { + r0 = ret.Get(0).(bool) + } + + return r0 +} + +type mockConstructorTestingTNewPluginFinder interface { + mock.TestingT + Cleanup(func()) +} + +// NewPluginFinder creates a new instance of PluginFinder. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewPluginFinder(t mockConstructorTestingTNewPluginFinder) *PluginFinder { + mock := &PluginFinder{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/pkg/features/mocks/Verifier.go b/pkg/features/mocks/Verifier.go new file mode 100644 index 000000000..e391345a2 --- /dev/null +++ b/pkg/features/mocks/Verifier.go @@ -0,0 +1,49 @@ +// Code generated by mockery v2.22.1. DO NOT EDIT. + +package mocks + +import mock "github.com/stretchr/testify/mock" + +// Verifier is an autogenerated mock type for the Verifier type +type Verifier struct { + mock.Mock +} + +// Verify provides a mock function with given fields: name +func (_m *Verifier) Verify(name string) (bool, error) { + ret := _m.Called(name) + + var r0 bool + var r1 error + if rf, ok := ret.Get(0).(func(string) (bool, error)); ok { + return rf(name) + } + if rf, ok := ret.Get(0).(func(string) bool); ok { + r0 = rf(name) + } else { + r0 = ret.Get(0).(bool) + } + + if rf, ok := ret.Get(1).(func(string) error); ok { + r1 = rf(name) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type mockConstructorTestingTNewVerifier interface { + mock.TestingT + Cleanup(func()) +} + +// NewVerifier creates a new instance of Verifier. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewVerifier(t mockConstructorTestingTNewVerifier) *Verifier { + mock := &Verifier{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/pkg/features/verify.go b/pkg/features/verify.go new file mode 100644 index 000000000..0474d5076 --- /dev/null +++ b/pkg/features/verify.go @@ -0,0 +1,71 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package features + +import ( + "errors" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" +) + +type PluginFinder interface { + Find(kind common.PluginKind, name string) bool +} + +type Verifier interface { + Verify(name string) (bool, error) +} + +type verifier struct { + finder PluginFinder +} + +func NewVerifier(finder PluginFinder) Verifier { + return &verifier{ + finder: finder, + } +} + +func (v *verifier) Verify(name string) (bool, error) { + enabled := IsEnabled(name) + + switch name { + case velerov1api.CSIFeatureFlag: + return verifyCSIFeature(v.finder, enabled) + default: + return false, nil + } +} + +func verifyCSIFeature(finder PluginFinder, enabled bool) (bool, error) { + installed := false + installed = finder.Find(common.PluginKindBackupItemActionV2, "velero.io/csi-pvc-backupper") + if installed { + installed = finder.Find(common.PluginKindRestoreItemActionV2, "velero.io/csi-pvc-restorer") + } + + if !enabled && installed { + return false, errors.New("CSI plugins are registered, but the EnableCSI feature is not enabled") + } else if enabled && !installed { + return false, errors.New("CSI feature is enabled, but CSI plugins are not registered") + } else if !enabled && !installed { + return false, nil + } else { + return true, nil + } +} diff --git a/pkg/features/verify_test.go b/pkg/features/verify_test.go new file mode 100644 index 000000000..fee6b2b2d --- /dev/null +++ b/pkg/features/verify_test.go @@ -0,0 +1,61 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package features + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + + findermocks "github.com/vmware-tanzu/velero/pkg/features/mocks" +) + +func TestVerify(t *testing.T) { + NewFeatureFlagSet() + verifier := verifier{} + + finder := new(findermocks.PluginFinder) + finder.On("Find", mock.Anything, mock.Anything).Return(false) + verifier.finder = finder + ready, err := verifier.Verify("EnableCSI") + assert.Equal(t, false, ready) + assert.Nil(t, err) + + finder = new(findermocks.PluginFinder) + finder.On("Find", mock.Anything, mock.Anything).Return(true) + verifier.finder = finder + ready, err = verifier.Verify("EnableCSI") + assert.Equal(t, false, ready) + assert.EqualError(t, err, "CSI plugins are registered, but the EnableCSI feature is not enabled") + + Enable("EnableCSI") + finder = new(findermocks.PluginFinder) + finder.On("Find", mock.Anything, mock.Anything).Return(false) + verifier.finder = finder + ready, err = verifier.Verify("EnableCSI") + assert.Equal(t, false, ready) + assert.EqualError(t, err, "CSI feature is enabled, but CSI plugins are not registered") + + Enable("EnableCSI") + finder = new(findermocks.PluginFinder) + finder.On("Find", mock.Anything, mock.Anything).Return(true) + verifier.finder = finder + ready, err = verifier.Verify("EnableCSI") + assert.Equal(t, true, ready) + assert.Nil(t, err) +} diff --git a/pkg/plugin/clientmgmt/manager_test.go b/pkg/plugin/clientmgmt/manager_test.go index 1e8c14926..3a1c39529 100644 --- a/pkg/plugin/clientmgmt/manager_test.go +++ b/pkg/plugin/clientmgmt/manager_test.go @@ -61,6 +61,10 @@ func (r *mockRegistry) Get(kind common.PluginKind, name string) (framework.Plugi return id, args.Error(1) } +func (r *mockRegistry) Find(kind common.PluginKind, name string) bool { + return false +} + func TestNewManager(t *testing.T) { logger := test.NewLogger() logLevel := logrus.InfoLevel diff --git a/pkg/plugin/clientmgmt/process/registry.go b/pkg/plugin/clientmgmt/process/registry.go index 6238c45fb..7845f79ef 100644 --- a/pkg/plugin/clientmgmt/process/registry.go +++ b/pkg/plugin/clientmgmt/process/registry.go @@ -37,6 +37,9 @@ type Registry interface { List(kind common.PluginKind) []framework.PluginIdentifier // Get returns the PluginIdentifier for kind and name. Get(kind common.PluginKind, name string) (framework.PluginIdentifier, error) + + // Find checks if the specified plugin exists in the registry + Find(kind common.PluginKind, name string) bool } // KindAndName is a convenience struct that combines a PluginKind and a name. @@ -125,6 +128,12 @@ func (r *registry) Get(kind common.PluginKind, name string) (framework.PluginIde return p, nil } +// Contain if the specified plugin exists in the registry +func (r *registry) Find(kind common.PluginKind, name string) bool { + _, found := r.pluginsByID[KindAndName{Kind: kind, Name: name}] + return found +} + // readPluginsDir recursively reads dir looking for plugins. func (r *registry) readPluginsDir(dir string) ([]string, error) { if _, err := r.fs.Stat(dir); err != nil { diff --git a/pkg/restore/restore.go b/pkg/restore/restore.go index 56ac96cfd..ea0af47c9 100644 --- a/pkg/restore/restore.go +++ b/pkg/restore/restore.go @@ -115,6 +115,7 @@ type kubernetesRestorer struct { podGetter cache.Getter credentialFileStore credentials.FileStore kbClient crclient.Client + featureVerifier features.Verifier } // NewKubernetesRestorer creates a new kubernetesRestorer. @@ -132,6 +133,7 @@ func NewKubernetesRestorer( podGetter cache.Getter, credentialStore credentials.FileStore, kbClient crclient.Client, + featureVerifier features.Verifier, ) (Restorer, error) { return &kubernetesRestorer{ discoveryHelper: discoveryHelper, @@ -156,6 +158,7 @@ func NewKubernetesRestorer( podGetter: podGetter, credentialFileStore: credentialStore, kbClient: kbClient, + featureVerifier: featureVerifier, }, nil } @@ -321,6 +324,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers( itemOperationsList: req.GetItemOperationsList(), resourceModifiers: req.ResourceModifiers, disableInformerCache: req.DisableInformerCache, + featureVerifier: kr.featureVerifier, } return restoreCtx.execute() @@ -372,6 +376,7 @@ type restoreContext struct { itemOperationsList *[]*itemoperation.RestoreOperation resourceModifiers *resourcemodifiers.ResourceModifiers disableInformerCache bool + featureVerifier features.Verifier } type resourceClientKey struct { @@ -1310,6 +1315,11 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso }).Infof("Dynamically re-provisioning persistent volume because it has a related CSI VolumeSnapshot.") ctx.pvsToProvision.Insert(name) + if ready, err := ctx.featureVerifier.Verify(velerov1api.CSIFeatureFlag); !ready { + ctx.log.Errorf("Failed to verify CSI modules, ready %v, err %v", ready, err) + errs.Add(namespace, fmt.Errorf("CSI modules are not ready for restore. Check CSI feature is enabled and CSI plugin is installed")) + } + // Return early because we don't want to restore the PV itself, we // want to dynamically re-provision it. return warnings, errs, itemExists @@ -1322,6 +1332,11 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso }).Infof("Dynamically re-provisioning persistent volume because it has a related snapshot DataUpload.") ctx.pvsToProvision.Insert(name) + if ready, err := ctx.featureVerifier.Verify(velerov1api.CSIFeatureFlag); !ready { + ctx.log.Errorf("Failed to verify CSI modules, ready %v, err %v", ready, err) + errs.Add(namespace, fmt.Errorf("CSI modules are not ready for restore. Check CSI feature is enabled and CSI plugin is installed")) + } + // Return early because we don't want to restore the PV itself, we // want to dynamically re-provision it. return warnings, errs, itemExists diff --git a/pkg/restore/restore_test.go b/pkg/restore/restore_test.go index 643d001af..d2f86e3c7 100644 --- a/pkg/restore/restore_test.go +++ b/pkg/restore/restore_test.go @@ -29,6 +29,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" corev1api "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -46,6 +47,7 @@ import ( "github.com/vmware-tanzu/velero/pkg/builder" "github.com/vmware-tanzu/velero/pkg/client" "github.com/vmware-tanzu/velero/pkg/discovery" + verifiermocks "github.com/vmware-tanzu/velero/pkg/features/mocks" "github.com/vmware-tanzu/velero/pkg/itemoperation" "github.com/vmware-tanzu/velero/pkg/kuberesource" "github.com/vmware-tanzu/velero/pkg/plugin/velero" @@ -2272,6 +2274,7 @@ func TestRestorePersistentVolumes(t *testing.T) { want []*test.APIResource wantError bool wantWarning bool + csiFeatureVerifierErr string }{ { name: "when a PV with a reclaim policy of delete has no snapshot and does not exist in-cluster, it does not get restored, and its PVC gets reset for dynamic provisioning", @@ -2966,6 +2969,47 @@ func TestRestorePersistentVolumes(t *testing.T) { }, want: []*test.APIResource{}, }, + { + name: "when a PV has a CSI VolumeSnapshot, but CSI modules are not ready, the PV is not restored", + restore: defaultRestore().Result(), + backup: defaultBackup().Result(), + tarball: test.NewTarWriter(t). + AddItems("persistentvolumes", + builder.ForPersistentVolume("pv-1"). + ReclaimPolicy(corev1api.PersistentVolumeReclaimRetain). + ClaimRef("velero", testPVCName). + Result(), + ). + Done(), + apiResources: []*test.APIResource{ + test.PVs(), + test.PVCs(), + }, + csiVolumeSnapshots: []*snapshotv1api.VolumeSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "velero", + Name: "test", + }, + Spec: snapshotv1api.VolumeSnapshotSpec{ + Source: snapshotv1api.VolumeSnapshotSource{ + PersistentVolumeClaimName: &testPVCName, + }, + }, + }, + }, + volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{ + builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(), + }, + volumeSnapshotterGetter: map[string]vsv1.VolumeSnapshotter{ + "provider-1": &volumeSnapshotter{ + snapshotVolumes: map[string]string{"snapshot-1": "new-volume"}, + }, + }, + want: []*test.APIResource{}, + csiFeatureVerifierErr: "fake-feature-check-error", + wantError: true, + }, { name: "when a PV with a reclaim policy of retain has a DataUpload result CM and does not exist in-cluster, the PV is not restored", restore: defaultRestore().ObjectMeta(builder.WithUID("fakeUID")).Result(), @@ -2993,11 +3037,45 @@ func TestRestorePersistentVolumes(t *testing.T) { }, dataUploadResult: builder.ForConfigMap("velero", "test").ObjectMeta(builder.WithLabelsMap(map[string]string{ velerov1api.RestoreUIDLabel: "fakeUID", - velerov1api.PVCNamespaceNameLabel: "velero/testPVC", + velerov1api.PVCNamespaceNameLabel: "velero.testPVC", velerov1api.ResourceUsageLabel: string(velerov1api.VeleroResourceUsageDataUploadResult), })).Result(), want: []*test.APIResource{}, }, + { + name: "when a PV has a DataUpload result CM, but CSI modules are not ready, the PV is not restored", + restore: defaultRestore().ObjectMeta(builder.WithUID("fakeUID")).Result(), + backup: defaultBackup().Result(), + tarball: test.NewTarWriter(t). + AddItems("persistentvolumes", + builder.ForPersistentVolume("pv-1"). + ReclaimPolicy(corev1api.PersistentVolumeReclaimRetain). + ClaimRef("velero", testPVCName). + Result(), + ). + Done(), + apiResources: []*test.APIResource{ + test.PVs(), + test.PVCs(), + test.ConfigMaps(), + }, + volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{ + builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(), + }, + volumeSnapshotterGetter: map[string]vsv1.VolumeSnapshotter{ + "provider-1": &volumeSnapshotter{ + snapshotVolumes: map[string]string{"snapshot-1": "new-volume"}, + }, + }, + dataUploadResult: builder.ForConfigMap("velero", "test").ObjectMeta(builder.WithLabelsMap(map[string]string{ + velerov1api.RestoreUIDLabel: "fakeUID", + velerov1api.PVCNamespaceNameLabel: "velero.testPVC", + velerov1api.ResourceUsageLabel: string(velerov1api.VeleroResourceUsageDataUploadResult), + })).Result(), + want: []*test.APIResource{}, + csiFeatureVerifierErr: "fake-feature-check-error", + wantError: true, + }, } for _, tc := range tests { @@ -3009,6 +3087,14 @@ func TestRestorePersistentVolumes(t *testing.T) { return renamed, nil } + verifierMock := new(verifiermocks.Verifier) + if tc.csiFeatureVerifierErr != "" { + verifierMock.On("Verify", mock.Anything, mock.Anything).Return(false, errors.New(tc.csiFeatureVerifierErr)) + } else { + verifierMock.On("Verify", mock.Anything, mock.Anything).Return(true, nil) + } + h.restorer.featureVerifier = verifierMock + // set up the VolumeSnapshotLocation client and add test data to it for _, vsl := range tc.volumeSnapshotLocations { require.NoError(t, h.restorer.kbClient.Create(context.Background(), vsl))