diff --git a/changelogs/unreleased/7077-Lyndon-Li b/changelogs/unreleased/7077-Lyndon-Li new file mode 100644 index 000000000..802609edf --- /dev/null +++ b/changelogs/unreleased/7077-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #6693, partially fail restore if CSI snapshot is involved but CSI feature is not ready, i.e., CSI feature gate is not enabled or CSI plugin is not installed. \ No newline at end of file diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index f8b03f8e3..56cb67f02 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -72,7 +72,6 @@ import ( "github.com/vmware-tanzu/velero/pkg/persistence" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt/process" - "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/podexec" "github.com/vmware-tanzu/velero/pkg/podvolume" "github.com/vmware-tanzu/velero/pkg/repository" @@ -268,6 +267,7 @@ type server struct { mgr manager.Manager credentialFileStore credentials.FileStore credentialSecretStore credentials.SecretStore + featureVerifier *features.Verifier } func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*server, error) { @@ -309,11 +309,10 @@ func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*s return nil, err } - if !features.IsEnabled(velerov1api.CSIFeatureFlag) { - _, err = pluginRegistry.Get(common.PluginKindBackupItemActionV2, "velero.io/csi-pvc-backupper") - if err == nil { - logger.Warn("CSI plugins are registered, but the EnableCSI feature is not enabled.") - } + featureVerifier := features.NewVerifier(pluginRegistry) + + if _, err := featureVerifier.Verify(velerov1api.CSIFeatureFlag); err != nil { + logger.WithError(err).Warn("CSI feature verification failed, the feature may not be ready.") } // cancelFunc is not deferred here because if it was, then ctx would immediately @@ -397,6 +396,7 @@ func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*s mgr: mgr, credentialFileStore: credentialFileStore, credentialSecretStore: credentialSecretStore, + featureVerifier: featureVerifier, } // Setup CSI snapshot client and lister @@ -942,6 +942,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string s.kubeClient.CoreV1().RESTClient(), s.credentialFileStore, s.mgr.GetClient(), + s.featureVerifier, ) cmd.CheckError(err) diff --git a/pkg/features/verify.go b/pkg/features/verify.go new file mode 100644 index 000000000..dfd5febaa --- /dev/null +++ b/pkg/features/verify.go @@ -0,0 +1,67 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package features + +import ( + "errors" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" +) + +type PluginFinder interface { + Find(kind common.PluginKind, name string) bool +} + +type Verifier struct { + finder PluginFinder +} + +func NewVerifier(finder PluginFinder) *Verifier { + return &Verifier{ + finder: finder, + } +} + +func (v *Verifier) Verify(name string) (bool, error) { + enabled := IsEnabled(name) + + switch name { + case velerov1api.CSIFeatureFlag: + return verifyCSIFeature(v.finder, enabled) + default: + return false, nil + } +} + +func verifyCSIFeature(finder PluginFinder, enabled bool) (bool, error) { + installed := false + installed = finder.Find(common.PluginKindBackupItemActionV2, "velero.io/csi-pvc-backupper") + if installed { + installed = finder.Find(common.PluginKindRestoreItemActionV2, "velero.io/csi-pvc-restorer") + } + + if !enabled && installed { + return false, errors.New("CSI plugins are registered, but the EnableCSI feature is not enabled") + } else if enabled && !installed { + return false, errors.New("CSI feature is enabled, but CSI plugins are not registered") + } else if !enabled && !installed { + return false, nil + } else { + return true, nil + } +} diff --git a/pkg/plugin/clientmgmt/manager_test.go b/pkg/plugin/clientmgmt/manager_test.go index 1e8c14926..3a1c39529 100644 --- a/pkg/plugin/clientmgmt/manager_test.go +++ b/pkg/plugin/clientmgmt/manager_test.go @@ -61,6 +61,10 @@ func (r *mockRegistry) Get(kind common.PluginKind, name string) (framework.Plugi return id, args.Error(1) } +func (r *mockRegistry) Find(kind common.PluginKind, name string) bool { + return false +} + func TestNewManager(t *testing.T) { logger := test.NewLogger() logLevel := logrus.InfoLevel diff --git a/pkg/plugin/clientmgmt/process/registry.go b/pkg/plugin/clientmgmt/process/registry.go index 6238c45fb..7845f79ef 100644 --- a/pkg/plugin/clientmgmt/process/registry.go +++ b/pkg/plugin/clientmgmt/process/registry.go @@ -37,6 +37,9 @@ type Registry interface { List(kind common.PluginKind) []framework.PluginIdentifier // Get returns the PluginIdentifier for kind and name. Get(kind common.PluginKind, name string) (framework.PluginIdentifier, error) + + // Find checks if the specified plugin exists in the registry + Find(kind common.PluginKind, name string) bool } // KindAndName is a convenience struct that combines a PluginKind and a name. @@ -125,6 +128,12 @@ func (r *registry) Get(kind common.PluginKind, name string) (framework.PluginIde return p, nil } +// Contain if the specified plugin exists in the registry +func (r *registry) Find(kind common.PluginKind, name string) bool { + _, found := r.pluginsByID[KindAndName{Kind: kind, Name: name}] + return found +} + // readPluginsDir recursively reads dir looking for plugins. func (r *registry) readPluginsDir(dir string) ([]string, error) { if _, err := r.fs.Stat(dir); err != nil { diff --git a/pkg/restore/restore.go b/pkg/restore/restore.go index 8f623800c..83ac84ed7 100644 --- a/pkg/restore/restore.go +++ b/pkg/restore/restore.go @@ -114,6 +114,7 @@ type kubernetesRestorer struct { podGetter cache.Getter credentialFileStore credentials.FileStore kbClient crclient.Client + featureVerifier *features.Verifier } // NewKubernetesRestorer creates a new kubernetesRestorer. @@ -131,6 +132,7 @@ func NewKubernetesRestorer( podGetter cache.Getter, credentialStore credentials.FileStore, kbClient crclient.Client, + featureVerifier *features.Verifier, ) (Restorer, error) { return &kubernetesRestorer{ discoveryHelper: discoveryHelper, @@ -155,6 +157,7 @@ func NewKubernetesRestorer( podGetter: podGetter, credentialFileStore: credentialStore, kbClient: kbClient, + featureVerifier: featureVerifier, }, nil } @@ -319,6 +322,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers( itemOperationsList: req.GetItemOperationsList(), resourceModifiers: req.ResourceModifiers, disableInformerCache: req.DisableInformerCache, + featureVerifier: kr.featureVerifier, } return restoreCtx.execute() @@ -369,6 +373,7 @@ type restoreContext struct { itemOperationsList *[]*itemoperation.RestoreOperation resourceModifiers *resourcemodifiers.ResourceModifiers disableInformerCache bool + featureVerifier *features.Verifier } type resourceClientKey struct { @@ -1295,6 +1300,14 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso // want to dynamically re-provision it. return warnings, errs, itemExists + case hasCSIVolumeSnapshot(ctx, obj): + case hasSnapshotDataUpload(ctx, obj): + if ready, err := ctx.featureVerifier.Verify(velerov1api.CSIFeatureFlag); !ready { + ctx.log.Errorf("Failed to verify CSI modules, ready %v, err %v", ready, err) + errs.Add(namespace, fmt.Errorf("CSI modules are not ready for restore. Check CSI feature is enabled and CSI plugin is installed")) + return warnings, errs, itemExists + } + case hasDeleteReclaimPolicy(obj.Object): ctx.log.Infof("Dynamically re-provisioning persistent volume because it doesn't have a snapshot and its reclaim policy is Delete.") ctx.pvsToProvision.Insert(name) @@ -1938,6 +1951,43 @@ func hasSnapshot(pvName string, snapshots []*volume.Snapshot) bool { return false } +func hasCSIVolumeSnapshot(ctx *restoreContext, unstructuredPV *unstructured.Unstructured) bool { + return false +} + +func hasSnapshotDataUpload(ctx *restoreContext, unstructuredPV *unstructured.Unstructured) bool { + pv := new(v1.PersistentVolume) + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredPV.Object, pv); err != nil { + ctx.log.WithError(err).Warnf("Unable to convert PV from unstructured to structured") + return false + } + + if pv.Spec.ClaimRef == nil { + return false + } + + dataUploadResultList := new(v1.ConfigMapList) + err := ctx.kbClient.List(go_context.TODO(), dataUploadResultList, &crclient.ListOptions{ + LabelSelector: labels.SelectorFromSet(map[string]string{ + velerov1api.RestoreUIDLabel: label.GetValidName(string(ctx.restore.GetUID())), + velerov1api.PVCNamespaceNameLabel: label.GetValidName(pv.Spec.ClaimRef.Namespace + "." + pv.Spec.ClaimRef.Name), + velerov1api.ResourceUsageLabel: label.GetValidName(string(velerov1api.VeleroResourceUsageDataUploadResult)), + }), + }) + if err != nil { + ctx.log.WithError(err).Warnf("Fail to list DataUpload result CM.") + return false + } + + if len(dataUploadResultList.Items) != 1 { + ctx.log.WithError(fmt.Errorf("dataupload result number is not expected")). + Warnf("Got %d DataUpload result. Expect one.", len(dataUploadResultList.Items)) + return false + } + + return true +} + func hasPodVolumeBackup(unstructuredPV *unstructured.Unstructured, ctx *restoreContext) bool { if len(ctx.podVolumeBackups) == 0 { return false