Merge pull request #9370 from Lyndon-Li/cache-volume-configuration
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 47s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 11s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 14m2s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 5m12s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m59s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m15s

Add cache volume configuration
This commit is contained in:
lyndon-li
2025-11-01 10:38:54 +08:00
committed by GitHub
8 changed files with 211 additions and 15 deletions

View File

@@ -0,0 +1 @@
Add cache volume configuration

View File

@@ -17,6 +17,7 @@ limitations under the License.
package builder
import (
corev1api "k8s.io/api/core/v1"
storagev1api "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@@ -87,3 +88,9 @@ func (b *StorageClassBuilder) Provisioner(provisioner string) *StorageClassBuild
b.object.Provisioner = provisioner
return b
}
// ReclaimPolicy sets StorageClass's reclaimPolicy.
func (b *StorageClassBuilder) ReclaimPolicy(policy corev1api.PersistentVolumeReclaimPolicy) *StorageClassBuilder {
b.object.ReclaimPolicy = &policy
return b
}

View File

@@ -84,6 +84,7 @@ type nodeAgentServerConfig struct {
resourceTimeout time.Duration
dataMoverPrepareTimeout time.Duration
nodeAgentConfig string
backupRepoConfig string
}
func NewServerCommand(f client.Factory) *cobra.Command {
@@ -121,6 +122,7 @@ func NewServerCommand(f client.Factory) *cobra.Command {
command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics")
command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-configmap", config.nodeAgentConfig, "The name of ConfigMap containing node-agent configurations.")
command.Flags().StringVar(&config.backupRepoConfig, "backup-repository-configmap", config.backupRepoConfig, "The name of ConfigMap containing backup repository configurations.")
return command
}
@@ -140,6 +142,7 @@ type nodeAgentServer struct {
csiSnapshotClient *snapshotv1client.Clientset
dataPathMgr *datapath.Manager
dataPathConfigs *velerotypes.NodeAgentConfigs
backupRepoConfigs map[string]string
vgdpCounter *exposer.VgdpCounter
}
@@ -254,6 +257,11 @@ func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, confi
if err := s.getDataPathConfigs(); err != nil {
return nil, err
}
if err := s.getBackupRepoConfigs(); err != nil {
return nil, err
}
s.dataPathMgr = datapath.NewManager(s.getDataPathConcurrentNum(defaultDataPathConcurrentNum))
return s, nil
@@ -329,6 +337,14 @@ func (s *nodeAgentServer) run() {
}
}
if s.dataPathConfigs != nil && s.dataPathConfigs.CachePVCConfig != nil {
if err := s.validateCachePVCConfig(*s.dataPathConfigs.CachePVCConfig); err != nil {
s.logger.WithError(err).Warnf("Ignore cache config %v", s.dataPathConfigs.CachePVCConfig)
} else {
s.logger.Infof("Using cache volume configs %v", s.dataPathConfigs.CachePVCConfig)
}
}
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass, privilegedFsBackup)
if err := pvbReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup)
@@ -557,14 +573,32 @@ func (s *nodeAgentServer) getDataPathConfigs() error {
configs, err := getConfigsFunc(s.ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig)
if err != nil {
s.logger.WithError(err).Errorf("Failed to get node agent configs from configMap %s, ignore it", s.config.nodeAgentConfig)
return err
return errors.Wrapf(err, "error getting node agent configs from configMap %s", s.config.nodeAgentConfig)
}
s.dataPathConfigs = configs
return nil
}
func (s *nodeAgentServer) getBackupRepoConfigs() error {
if s.config.backupRepoConfig == "" {
s.logger.Info("No backup repo configMap is specified")
return nil
}
cm, err := s.kubeClient.CoreV1().ConfigMaps(s.namespace).Get(s.ctx, s.config.backupRepoConfig, metav1.GetOptions{})
if err != nil {
return errors.Wrapf(err, "error getting backup repo configs from configMap %s", s.config.backupRepoConfig)
}
if cm.Data == nil {
return errors.Errorf("no data is in the backup repo configMap %s", s.config.backupRepoConfig)
}
s.backupRepoConfigs = cm.Data
return nil
}
func (s *nodeAgentServer) getDataPathConcurrentNum(defaultNum int) int {
configs := s.dataPathConfigs
@@ -620,3 +654,20 @@ func (s *nodeAgentServer) getDataPathConcurrentNum(defaultNum int) int {
return concurrentNum
}
func (s *nodeAgentServer) validateCachePVCConfig(config velerotypes.CachePVC) error {
if config.StorageClass == "" {
return errors.New("storage class is absent")
}
sc, err := s.kubeClient.StorageV1().StorageClasses().Get(s.ctx, config.StorageClass, metav1.GetOptions{})
if err != nil {
return errors.Wrapf(err, "error getting storage class %s", config.StorageClass)
}
if sc.ReclaimPolicy != nil && *sc.ReclaimPolicy != corev1api.PersistentVolumeReclaimDelete {
return errors.Errorf("unexpected storage class reclaim policy %v", *sc.ReclaimPolicy)
}
return nil
}

View File

@@ -24,6 +24,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1api "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -34,6 +35,8 @@ import (
"github.com/vmware-tanzu/velero/pkg/nodeagent"
testutil "github.com/vmware-tanzu/velero/pkg/test"
velerotypes "github.com/vmware-tanzu/velero/pkg/types"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
)
func Test_validatePodVolumesHostPath(t *testing.T) {
@@ -142,11 +145,10 @@ func Test_getDataPathConfigs(t *testing.T) {
getFunc func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error)
configMapName string
expectConfigs *velerotypes.NodeAgentConfigs
expectLog string
expectedErr string
}{
{
name: "no config specified",
expectLog: "No node-agent configMap is specified",
name: "no config specified",
},
{
name: "failed to get configs",
@@ -154,7 +156,7 @@ func Test_getDataPathConfigs(t *testing.T) {
getFunc: func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error) {
return nil, errors.New("fake-get-error")
},
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
expectedErr: "error getting node agent configs from configMap node-agent-config: fake-get-error",
},
{
name: "configs cm not found",
@@ -162,7 +164,7 @@ func Test_getDataPathConfigs(t *testing.T) {
getFunc: func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error) {
return nil, errors.New("fake-not-found-error")
},
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
expectedErr: "error getting node agent configs from configMap node-agent-config: fake-not-found-error",
},
{
@@ -177,23 +179,21 @@ func Test_getDataPathConfigs(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
logBuffer := ""
s := &nodeAgentServer{
config: nodeAgentServerConfig{
nodeAgentConfig: test.configMapName,
},
logger: testutil.NewSingleLogger(&logBuffer),
logger: testutil.NewLogger(),
}
getConfigsFunc = test.getFunc
s.getDataPathConfigs()
assert.Equal(t, test.expectConfigs, s.dataPathConfigs)
if test.expectLog == "" {
assert.Empty(t, logBuffer)
err := s.getDataPathConfigs()
if test.expectedErr == "" {
require.NoError(t, err)
assert.Equal(t, test.expectConfigs, s.dataPathConfigs)
} else {
assert.Contains(t, logBuffer, test.expectLog)
require.EqualError(t, err, test.expectedErr)
}
})
}
@@ -416,3 +416,117 @@ func Test_getDataPathConcurrentNum(t *testing.T) {
})
}
}
func TestGetBackupRepoConfigs(t *testing.T) {
cmNoData := builder.ForConfigMap(velerov1api.DefaultNamespace, "backup-repo-config").Result()
cmWithData := builder.ForConfigMap(velerov1api.DefaultNamespace, "backup-repo-config").Data("cacheLimit", "100").Result()
tests := []struct {
name string
configMapName string
kubeClientObj []runtime.Object
expectConfigs map[string]string
expectedErr string
}{
{
name: "no config specified",
},
{
name: "failed to get configs",
configMapName: "backup-repo-config",
expectedErr: "error getting backup repo configs from configMap backup-repo-config: configmaps \"backup-repo-config\" not found",
},
{
name: "configs data not found",
kubeClientObj: []runtime.Object{cmNoData},
configMapName: "backup-repo-config",
expectedErr: "no data is in the backup repo configMap backup-repo-config",
},
{
name: "succeed",
configMapName: "backup-repo-config",
kubeClientObj: []runtime.Object{cmWithData},
expectConfigs: map[string]string{"cacheLimit": "100"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
s := &nodeAgentServer{
namespace: velerov1api.DefaultNamespace,
kubeClient: fakeKubeClient,
config: nodeAgentServerConfig{
backupRepoConfig: test.configMapName,
},
logger: testutil.NewLogger(),
}
err := s.getBackupRepoConfigs()
if test.expectedErr == "" {
require.NoError(t, err)
require.Equal(t, test.expectConfigs, s.backupRepoConfigs)
} else {
require.EqualError(t, err, test.expectedErr)
}
})
}
}
func TestValidateCachePVCConfig(t *testing.T) {
scWithRetainPolicy := builder.ForStorageClass("fake-storage-class").ReclaimPolicy(corev1api.PersistentVolumeReclaimRetain).Result()
scWithDeletePolicy := builder.ForStorageClass("fake-storage-class").ReclaimPolicy(corev1api.PersistentVolumeReclaimDelete).Result()
scWithNoPolicy := builder.ForStorageClass("fake-storage-class").Result()
tests := []struct {
name string
config velerotypes.CachePVC
kubeClientObj []runtime.Object
expectedErr string
}{
{
name: "no storage class",
expectedErr: "storage class is absent",
},
{
name: "failed to get storage class",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
expectedErr: "error getting storage class fake-storage-class: storageclasses.storage.k8s.io \"fake-storage-class\" not found",
},
{
name: "storage class reclaim policy is not expected",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
kubeClientObj: []runtime.Object{scWithRetainPolicy},
expectedErr: "unexpected storage class reclaim policy Retain",
},
{
name: "storage class reclaim policy is delete",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
kubeClientObj: []runtime.Object{scWithDeletePolicy},
},
{
name: "storage class with no reclaim policy",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
kubeClientObj: []runtime.Object{scWithNoPolicy},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
s := &nodeAgentServer{
kubeClient: fakeKubeClient,
}
err := s.validateCachePVCConfig(test.config)
if test.expectedErr == "" {
require.NoError(t, err)
} else {
require.EqualError(t, err, test.expectedErr)
}
})
}
}

View File

@@ -57,6 +57,10 @@ func DaemonSet(namespace string, opts ...podTemplateOption) *appsv1api.DaemonSet
daemonSetArgs = append(daemonSetArgs, fmt.Sprintf("--node-agent-configmap=%s", c.nodeAgentConfigMap))
}
if len(c.backupRepoConfigMap) > 0 {
daemonSetArgs = append(daemonSetArgs, fmt.Sprintf("--backup-repository-configmap=%s", c.backupRepoConfigMap))
}
userID := int64(0)
mountPropagationMode := corev1api.MountPropagationHostToContainer

View File

@@ -60,6 +60,10 @@ func TestDaemonSet(t *testing.T) {
assert.Len(t, ds.Spec.Template.Spec.Containers[0].Args, 3)
assert.Equal(t, "--node-agent-configmap=node-agent-config-map", ds.Spec.Template.Spec.Containers[0].Args[2])
ds = DaemonSet("velero", WithBackupRepoConfigMap("backup-repo-config-map"))
assert.Len(t, ds.Spec.Template.Spec.Containers[0].Args, 3)
assert.Equal(t, "--backup-repository-configmap=backup-repo-config-map", ds.Spec.Template.Spec.Containers[0].Args[2])
ds = DaemonSet("velero", WithServiceAccountName("test-sa"))
assert.Equal(t, "test-sa", ds.Spec.Template.Spec.ServiceAccountName)

View File

@@ -426,6 +426,10 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList {
dsOpts = append(dsOpts, WithNodeAgentConfigMap(o.NodeAgentConfigMap))
}
if len(o.BackupRepoConfigMap) > 0 {
dsOpts = append(dsOpts, WithBackupRepoConfigMap(o.BackupRepoConfigMap))
}
if len(o.KubeletRootDir) > 0 {
dsOpts = append(dsOpts, WithKubeletRootDir(o.KubeletRootDir))
}

View File

@@ -66,6 +66,14 @@ type RestorePVC struct {
IgnoreDelayBinding bool `json:"ignoreDelayBinding,omitempty"`
}
type CachePVC struct {
// StorageClass specifies the storage class for cache PVC
StorageClass string
// ResidentThreshold specifies the minimum size of the backup data to create cache PVC
ResidentThreshold int64
}
type NodeAgentConfigs struct {
// LoadConcurrency is the config for data path load concurrency per node.
LoadConcurrency *LoadConcurrency `json:"loadConcurrency,omitempty"`
@@ -87,4 +95,7 @@ type NodeAgentConfigs struct {
// PrivilegedFsBackup determines whether to create fs-backup pods as privileged pods
PrivilegedFsBackup bool `json:"privilegedFsBackup,omitempty"`
// CachePVCConfig is the config for cachePVC
CachePVCConfig *CachePVC `json:"cachePVC,omitempty"`
}