From 7178946debdb0b488005a049d3349e0c0759be26 Mon Sep 17 00:00:00 2001 From: Lyndon-Li Date: Tue, 28 Oct 2025 15:34:13 +0800 Subject: [PATCH] cache volume configuration Signed-off-by: Lyndon-Li --- changelogs/unreleased/9370-Lyndon-Li | 1 + pkg/builder/storage_class_builder.go | 7 ++ pkg/cmd/cli/nodeagent/server.go | 55 ++++++++++- pkg/cmd/cli/nodeagent/server_test.go | 140 ++++++++++++++++++++++++--- pkg/install/daemonset.go | 4 + pkg/install/daemonset_test.go | 4 + pkg/install/resources.go | 4 + pkg/types/node_agent.go | 11 +++ 8 files changed, 211 insertions(+), 15 deletions(-) create mode 100644 changelogs/unreleased/9370-Lyndon-Li diff --git a/changelogs/unreleased/9370-Lyndon-Li b/changelogs/unreleased/9370-Lyndon-Li new file mode 100644 index 000000000..43025c589 --- /dev/null +++ b/changelogs/unreleased/9370-Lyndon-Li @@ -0,0 +1 @@ +Add cache volume configuration \ No newline at end of file diff --git a/pkg/builder/storage_class_builder.go b/pkg/builder/storage_class_builder.go index 9e83ee957..b865dcfea 100644 --- a/pkg/builder/storage_class_builder.go +++ b/pkg/builder/storage_class_builder.go @@ -17,6 +17,7 @@ limitations under the License. package builder import ( + corev1api "k8s.io/api/core/v1" storagev1api "k8s.io/api/storage/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -87,3 +88,9 @@ func (b *StorageClassBuilder) Provisioner(provisioner string) *StorageClassBuild b.object.Provisioner = provisioner return b } + +// ReclaimPolicy sets StorageClass's reclaimPolicy. +func (b *StorageClassBuilder) ReclaimPolicy(policy corev1api.PersistentVolumeReclaimPolicy) *StorageClassBuilder { + b.object.ReclaimPolicy = &policy + return b +} diff --git a/pkg/cmd/cli/nodeagent/server.go b/pkg/cmd/cli/nodeagent/server.go index d3563c0f5..dbd471eb8 100644 --- a/pkg/cmd/cli/nodeagent/server.go +++ b/pkg/cmd/cli/nodeagent/server.go @@ -84,6 +84,7 @@ type nodeAgentServerConfig struct { resourceTimeout time.Duration dataMoverPrepareTimeout time.Duration nodeAgentConfig string + backupRepoConfig string } func NewServerCommand(f client.Factory) *cobra.Command { @@ -121,6 +122,7 @@ func NewServerCommand(f client.Factory) *cobra.Command { command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.") command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics") command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-configmap", config.nodeAgentConfig, "The name of ConfigMap containing node-agent configurations.") + command.Flags().StringVar(&config.backupRepoConfig, "backup-repository-configmap", config.backupRepoConfig, "The name of ConfigMap containing backup repository configurations.") return command } @@ -140,6 +142,7 @@ type nodeAgentServer struct { csiSnapshotClient *snapshotv1client.Clientset dataPathMgr *datapath.Manager dataPathConfigs *velerotypes.NodeAgentConfigs + backupRepoConfigs map[string]string vgdpCounter *exposer.VgdpCounter } @@ -254,6 +257,11 @@ func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, confi if err := s.getDataPathConfigs(); err != nil { return nil, err } + + if err := s.getBackupRepoConfigs(); err != nil { + return nil, err + } + s.dataPathMgr = datapath.NewManager(s.getDataPathConcurrentNum(defaultDataPathConcurrentNum)) return s, nil @@ -329,6 +337,14 @@ func (s *nodeAgentServer) run() { } } + if s.dataPathConfigs != nil && s.dataPathConfigs.CachePVCConfig != nil { + if err := s.validateCachePVCConfig(*s.dataPathConfigs.CachePVCConfig); err != nil { + s.logger.WithError(err).Warnf("Ignore cache config %v", s.dataPathConfigs.CachePVCConfig) + } else { + s.logger.Infof("Using cache volume configs %v", s.dataPathConfigs.CachePVCConfig) + } + } + pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass, privilegedFsBackup) if err := pvbReconciler.SetupWithManager(s.mgr); err != nil { s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup) @@ -557,14 +573,32 @@ func (s *nodeAgentServer) getDataPathConfigs() error { configs, err := getConfigsFunc(s.ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig) if err != nil { - s.logger.WithError(err).Errorf("Failed to get node agent configs from configMap %s, ignore it", s.config.nodeAgentConfig) - return err + return errors.Wrapf(err, "error getting node agent configs from configMap %s", s.config.nodeAgentConfig) } s.dataPathConfigs = configs return nil } +func (s *nodeAgentServer) getBackupRepoConfigs() error { + if s.config.backupRepoConfig == "" { + s.logger.Info("No backup repo configMap is specified") + return nil + } + + cm, err := s.kubeClient.CoreV1().ConfigMaps(s.namespace).Get(s.ctx, s.config.backupRepoConfig, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "error getting backup repo configs from configMap %s", s.config.backupRepoConfig) + } + + if cm.Data == nil { + return errors.Errorf("no data is in the backup repo configMap %s", s.config.backupRepoConfig) + } + + s.backupRepoConfigs = cm.Data + return nil +} + func (s *nodeAgentServer) getDataPathConcurrentNum(defaultNum int) int { configs := s.dataPathConfigs @@ -620,3 +654,20 @@ func (s *nodeAgentServer) getDataPathConcurrentNum(defaultNum int) int { return concurrentNum } + +func (s *nodeAgentServer) validateCachePVCConfig(config velerotypes.CachePVC) error { + if config.StorageClass == "" { + return errors.New("storage class is absent") + } + + sc, err := s.kubeClient.StorageV1().StorageClasses().Get(s.ctx, config.StorageClass, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "error getting storage class %s", config.StorageClass) + } + + if sc.ReclaimPolicy != nil && *sc.ReclaimPolicy != corev1api.PersistentVolumeReclaimDelete { + return errors.Errorf("unexpected storage class reclaim policy %v", *sc.ReclaimPolicy) + } + + return nil +} diff --git a/pkg/cmd/cli/nodeagent/server_test.go b/pkg/cmd/cli/nodeagent/server_test.go index cb1750e6b..152016ab6 100644 --- a/pkg/cmd/cli/nodeagent/server_test.go +++ b/pkg/cmd/cli/nodeagent/server_test.go @@ -24,6 +24,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1api "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -34,6 +35,8 @@ import ( "github.com/vmware-tanzu/velero/pkg/nodeagent" testutil "github.com/vmware-tanzu/velero/pkg/test" velerotypes "github.com/vmware-tanzu/velero/pkg/types" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" ) func Test_validatePodVolumesHostPath(t *testing.T) { @@ -142,11 +145,10 @@ func Test_getDataPathConfigs(t *testing.T) { getFunc func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error) configMapName string expectConfigs *velerotypes.NodeAgentConfigs - expectLog string + expectedErr string }{ { - name: "no config specified", - expectLog: "No node-agent configMap is specified", + name: "no config specified", }, { name: "failed to get configs", @@ -154,7 +156,7 @@ func Test_getDataPathConfigs(t *testing.T) { getFunc: func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error) { return nil, errors.New("fake-get-error") }, - expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it", + expectedErr: "error getting node agent configs from configMap node-agent-config: fake-get-error", }, { name: "configs cm not found", @@ -162,7 +164,7 @@ func Test_getDataPathConfigs(t *testing.T) { getFunc: func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error) { return nil, errors.New("fake-not-found-error") }, - expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it", + expectedErr: "error getting node agent configs from configMap node-agent-config: fake-not-found-error", }, { @@ -177,23 +179,21 @@ func Test_getDataPathConfigs(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - logBuffer := "" - s := &nodeAgentServer{ config: nodeAgentServerConfig{ nodeAgentConfig: test.configMapName, }, - logger: testutil.NewSingleLogger(&logBuffer), + logger: testutil.NewLogger(), } getConfigsFunc = test.getFunc - s.getDataPathConfigs() - assert.Equal(t, test.expectConfigs, s.dataPathConfigs) - if test.expectLog == "" { - assert.Empty(t, logBuffer) + err := s.getDataPathConfigs() + if test.expectedErr == "" { + require.NoError(t, err) + assert.Equal(t, test.expectConfigs, s.dataPathConfigs) } else { - assert.Contains(t, logBuffer, test.expectLog) + require.EqualError(t, err, test.expectedErr) } }) } @@ -416,3 +416,117 @@ func Test_getDataPathConcurrentNum(t *testing.T) { }) } } + +func TestGetBackupRepoConfigs(t *testing.T) { + cmNoData := builder.ForConfigMap(velerov1api.DefaultNamespace, "backup-repo-config").Result() + cmWithData := builder.ForConfigMap(velerov1api.DefaultNamespace, "backup-repo-config").Data("cacheLimit", "100").Result() + + tests := []struct { + name string + configMapName string + kubeClientObj []runtime.Object + expectConfigs map[string]string + expectedErr string + }{ + { + name: "no config specified", + }, + { + name: "failed to get configs", + configMapName: "backup-repo-config", + expectedErr: "error getting backup repo configs from configMap backup-repo-config: configmaps \"backup-repo-config\" not found", + }, + { + name: "configs data not found", + kubeClientObj: []runtime.Object{cmNoData}, + configMapName: "backup-repo-config", + expectedErr: "no data is in the backup repo configMap backup-repo-config", + }, + { + name: "succeed", + configMapName: "backup-repo-config", + kubeClientObj: []runtime.Object{cmWithData}, + expectConfigs: map[string]string{"cacheLimit": "100"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...) + + s := &nodeAgentServer{ + namespace: velerov1api.DefaultNamespace, + kubeClient: fakeKubeClient, + config: nodeAgentServerConfig{ + backupRepoConfig: test.configMapName, + }, + logger: testutil.NewLogger(), + } + + err := s.getBackupRepoConfigs() + if test.expectedErr == "" { + require.NoError(t, err) + require.Equal(t, test.expectConfigs, s.backupRepoConfigs) + } else { + require.EqualError(t, err, test.expectedErr) + } + }) + } +} + +func TestValidateCachePVCConfig(t *testing.T) { + scWithRetainPolicy := builder.ForStorageClass("fake-storage-class").ReclaimPolicy(corev1api.PersistentVolumeReclaimRetain).Result() + scWithDeletePolicy := builder.ForStorageClass("fake-storage-class").ReclaimPolicy(corev1api.PersistentVolumeReclaimDelete).Result() + scWithNoPolicy := builder.ForStorageClass("fake-storage-class").Result() + + tests := []struct { + name string + config velerotypes.CachePVC + kubeClientObj []runtime.Object + expectedErr string + }{ + { + name: "no storage class", + expectedErr: "storage class is absent", + }, + { + name: "failed to get storage class", + config: velerotypes.CachePVC{StorageClass: "fake-storage-class"}, + expectedErr: "error getting storage class fake-storage-class: storageclasses.storage.k8s.io \"fake-storage-class\" not found", + }, + { + name: "storage class reclaim policy is not expected", + config: velerotypes.CachePVC{StorageClass: "fake-storage-class"}, + kubeClientObj: []runtime.Object{scWithRetainPolicy}, + expectedErr: "unexpected storage class reclaim policy Retain", + }, + { + name: "storage class reclaim policy is delete", + config: velerotypes.CachePVC{StorageClass: "fake-storage-class"}, + kubeClientObj: []runtime.Object{scWithDeletePolicy}, + }, + { + name: "storage class with no reclaim policy", + config: velerotypes.CachePVC{StorageClass: "fake-storage-class"}, + kubeClientObj: []runtime.Object{scWithNoPolicy}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...) + + s := &nodeAgentServer{ + kubeClient: fakeKubeClient, + } + + err := s.validateCachePVCConfig(test.config) + + if test.expectedErr == "" { + require.NoError(t, err) + } else { + require.EqualError(t, err, test.expectedErr) + } + }) + } +} diff --git a/pkg/install/daemonset.go b/pkg/install/daemonset.go index 98f30cc69..ee63f3736 100644 --- a/pkg/install/daemonset.go +++ b/pkg/install/daemonset.go @@ -57,6 +57,10 @@ func DaemonSet(namespace string, opts ...podTemplateOption) *appsv1api.DaemonSet daemonSetArgs = append(daemonSetArgs, fmt.Sprintf("--node-agent-configmap=%s", c.nodeAgentConfigMap)) } + if len(c.backupRepoConfigMap) > 0 { + daemonSetArgs = append(daemonSetArgs, fmt.Sprintf("--backup-repository-configmap=%s", c.backupRepoConfigMap)) + } + userID := int64(0) mountPropagationMode := corev1api.MountPropagationHostToContainer diff --git a/pkg/install/daemonset_test.go b/pkg/install/daemonset_test.go index 77a1ebae8..139d3dcd0 100644 --- a/pkg/install/daemonset_test.go +++ b/pkg/install/daemonset_test.go @@ -60,6 +60,10 @@ func TestDaemonSet(t *testing.T) { assert.Len(t, ds.Spec.Template.Spec.Containers[0].Args, 3) assert.Equal(t, "--node-agent-configmap=node-agent-config-map", ds.Spec.Template.Spec.Containers[0].Args[2]) + ds = DaemonSet("velero", WithBackupRepoConfigMap("backup-repo-config-map")) + assert.Len(t, ds.Spec.Template.Spec.Containers[0].Args, 3) + assert.Equal(t, "--backup-repository-configmap=backup-repo-config-map", ds.Spec.Template.Spec.Containers[0].Args[2]) + ds = DaemonSet("velero", WithServiceAccountName("test-sa")) assert.Equal(t, "test-sa", ds.Spec.Template.Spec.ServiceAccountName) diff --git a/pkg/install/resources.go b/pkg/install/resources.go index 344fc599f..1d0ffa568 100644 --- a/pkg/install/resources.go +++ b/pkg/install/resources.go @@ -426,6 +426,10 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList { dsOpts = append(dsOpts, WithNodeAgentConfigMap(o.NodeAgentConfigMap)) } + if len(o.BackupRepoConfigMap) > 0 { + dsOpts = append(dsOpts, WithBackupRepoConfigMap(o.BackupRepoConfigMap)) + } + if len(o.KubeletRootDir) > 0 { dsOpts = append(dsOpts, WithKubeletRootDir(o.KubeletRootDir)) } diff --git a/pkg/types/node_agent.go b/pkg/types/node_agent.go index b335df275..121079dda 100644 --- a/pkg/types/node_agent.go +++ b/pkg/types/node_agent.go @@ -66,6 +66,14 @@ type RestorePVC struct { IgnoreDelayBinding bool `json:"ignoreDelayBinding,omitempty"` } +type CachePVC struct { + // StorageClass specifies the storage class for cache PVC + StorageClass string + + // ResidentThreshold specifies the minimum size of the backup data to create cache PVC + ResidentThreshold int64 +} + type NodeAgentConfigs struct { // LoadConcurrency is the config for data path load concurrency per node. LoadConcurrency *LoadConcurrency `json:"loadConcurrency,omitempty"` @@ -87,4 +95,7 @@ type NodeAgentConfigs struct { // PrivilegedFsBackup determines whether to create fs-backup pods as privileged pods PrivilegedFsBackup bool `json:"privilegedFsBackup,omitempty"` + + // CachePVCConfig is the config for cachePVC + CachePVCConfig *CachePVC `json:"cachePVC,omitempty"` }