Implement the Repo maintanence Job configuration design.

Remove the resource parameters from the velero server CLI.

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
This commit is contained in:
Xun Jiang
2024-08-21 15:14:53 +08:00
parent b92143dad1
commit 26cc41f26d
25 changed files with 1275 additions and 577 deletions

View File

@@ -23,7 +23,6 @@ import (
"strings"
"time"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/pkg/errors"
@@ -85,7 +84,8 @@ type Options struct {
DefaultSnapshotMoveData bool
DisableInformerCache bool
ScheduleSkipImmediately bool
MaintenanceCfg repository.MaintenanceConfig
PodResources kubeutil.PodResources
KeepLatestMaintenanceJobs int
}
// BindFlags adds command line values to the options struct.
@@ -130,11 +130,37 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.DefaultSnapshotMoveData, "default-snapshot-move-data", o.DefaultSnapshotMoveData, "Bool flag to configure Velero server to move data by default for all snapshots supporting data movement. Optional.")
flags.BoolVar(&o.DisableInformerCache, "disable-informer-cache", o.DisableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable). Optional.")
flags.BoolVar(&o.ScheduleSkipImmediately, "schedule-skip-immediately", o.ScheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).")
flags.IntVar(&o.MaintenanceCfg.KeepLatestMaitenanceJobs, "keep-latest-maintenance-jobs", o.MaintenanceCfg.KeepLatestMaitenanceJobs, "Number of latest maintenance jobs to keep each repository. Optional.")
flags.StringVar(&o.MaintenanceCfg.CPURequest, "maintenance-job-cpu-request", o.MaintenanceCfg.CPURequest, "CPU request for maintenance jobs. Default is no limit.")
flags.StringVar(&o.MaintenanceCfg.MemRequest, "maintenance-job-mem-request", o.MaintenanceCfg.MemRequest, "Memory request for maintenance jobs. Default is no limit.")
flags.StringVar(&o.MaintenanceCfg.CPULimit, "maintenance-job-cpu-limit", o.MaintenanceCfg.CPULimit, "CPU limit for maintenance jobs. Default is no limit.")
flags.StringVar(&o.MaintenanceCfg.MemLimit, "maintenance-job-mem-limit", o.MaintenanceCfg.MemLimit, "Memory limit for maintenance jobs. Default is no limit.")
flags.IntVar(
&o.KeepLatestMaintenanceJobs,
"keep-latest-maintenance-jobs",
o.KeepLatestMaintenanceJobs,
"Number of latest maintenance jobs to keep each repository. Optional.",
)
flags.StringVar(
&o.PodResources.CPURequest,
"maintenance-job-cpu-request",
o.PodResources.CPURequest,
"CPU request for maintenance jobs. Default is no limit.",
)
flags.StringVar(
&o.PodResources.MemoryRequest,
"maintenance-job-mem-request",
o.PodResources.MemoryRequest,
"Memory request for maintenance jobs. Default is no limit.",
)
flags.StringVar(
&o.PodResources.CPULimit,
"maintenance-job-cpu-limit",
o.PodResources.CPULimit,
"CPU limit for maintenance jobs. Default is no limit.",
)
flags.StringVar(
&o.PodResources.MemoryLimit,
"maintenance-job-mem-limit",
o.PodResources.MemoryLimit,
"Memory limit for maintenance jobs. Default is no limit.",
)
}
// NewInstallOptions instantiates a new, default InstallOptions struct.
@@ -231,7 +257,8 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
DefaultSnapshotMoveData: o.DefaultSnapshotMoveData,
DisableInformerCache: o.DisableInformerCache,
ScheduleSkipImmediately: o.ScheduleSkipImmediately,
MaintenanceCfg: o.MaintenanceCfg,
PodResources: o.PodResources,
KeepLatestMaintenanceJobs: o.KeepLatestMaintenanceJobs,
}, nil
}

View File

@@ -294,7 +294,7 @@ func (s *nodeAgentServer) run() {
s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller")
}
var loadAffinity *nodeagent.LoadAffinity
var loadAffinity *kube.LoadAffinity
if s.dataPathConfigs != nil && len(s.dataPathConfigs.LoadAffinity) > 0 {
loadAffinity = s.dataPathConfigs.LoadAffinity[0]
s.logger.Infof("Using customized loadAffinity %v", loadAffinity)
@@ -316,7 +316,21 @@ func (s *nodeAgentServer) run() {
}
}
dataUploadReconciler := controller.NewDataUploadReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.csiSnapshotClient.SnapshotV1(), s.dataPathMgr, loadAffinity, backupPVCConfig, podResources, clock.RealClock{}, s.nodeName, s.config.dataMoverPrepareTimeout, s.logger, s.metrics)
dataUploadReconciler := controller.NewDataUploadReconciler(
s.mgr.GetClient(),
s.mgr,
s.kubeClient,
s.csiSnapshotClient.SnapshotV1(),
s.dataPathMgr,
loadAffinity,
backupPVCConfig,
podResources,
clock.RealClock{},
s.nodeName,
s.config.dataMoverPrepareTimeout,
s.logger,
s.metrics,
)
if err = dataUploadReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.WithError(err).Fatal("Unable to create the data upload controller")
}

View File

@@ -11,9 +11,9 @@ import (
"github.com/vmware-tanzu/velero/pkg/cmd/util/flag"
"github.com/vmware-tanzu/velero/pkg/constant"
"github.com/vmware-tanzu/velero/pkg/podvolume"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/types"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)
@@ -47,6 +47,12 @@ const (
// defaultCredentialsDirectory is the path on disk where credential
// files will be written to
defaultCredentialsDirectory = "/tmp/credentials"
DefaultKeepLatestMaintenanceJobs = 3
DefaultMaintenanceJobCPURequest = "0"
DefaultMaintenanceJobCPULimit = "0"
DefaultMaintenanceJobMemRequest = "0"
DefaultMaintenanceJobMemLimit = "0"
)
var (
@@ -164,9 +170,11 @@ type Config struct {
DefaultSnapshotMoveData bool
DisableInformerCache bool
ScheduleSkipImmediately bool
MaintenanceCfg repository.MaintenanceConfig
BackukpRepoConfig string
CredentialsDirectory string
BackupRepoConfig string
RepoMaintenanceJobConfig string
PodResources kube.PodResources
KeepLatestMaintenanceJobs int
}
func GetDefaultConfig() *Config {
@@ -197,13 +205,13 @@ func GetDefaultConfig() *Config {
DisableInformerCache: defaultDisableInformerCache,
ScheduleSkipImmediately: false,
CredentialsDirectory: defaultCredentialsDirectory,
}
config.MaintenanceCfg = repository.MaintenanceConfig{
KeepLatestMaitenanceJobs: repository.DefaultKeepLatestMaitenanceJobs,
// maintenance job log setting inherited from velero server
FormatFlag: config.LogFormat,
LogLevelFlag: config.LogLevel,
PodResources: kube.PodResources{
CPURequest: DefaultMaintenanceJobCPULimit,
CPULimit: DefaultMaintenanceJobCPURequest,
MemoryRequest: DefaultMaintenanceJobMemRequest,
MemoryLimit: DefaultMaintenanceJobMemLimit,
},
KeepLatestMaintenanceJobs: DefaultKeepLatestMaintenanceJobs,
}
return config
@@ -238,11 +246,48 @@ func (c *Config) BindFlags(flags *pflag.FlagSet) {
flags.BoolVar(&c.DefaultSnapshotMoveData, "default-snapshot-move-data", c.DefaultSnapshotMoveData, "Move data by default for all snapshots supporting data movement.")
flags.BoolVar(&c.DisableInformerCache, "disable-informer-cache", c.DisableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable).")
flags.BoolVar(&c.ScheduleSkipImmediately, "schedule-skip-immediately", c.ScheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).")
flags.IntVar(&c.MaintenanceCfg.KeepLatestMaitenanceJobs, "keep-latest-maintenance-jobs", c.MaintenanceCfg.KeepLatestMaitenanceJobs, "Number of latest maintenance jobs to keep each repository. Optional.")
flags.StringVar(&c.MaintenanceCfg.CPURequest, "maintenance-job-cpu-request", c.MaintenanceCfg.CPURequest, "CPU request for maintenance job. Default is no limit.")
flags.StringVar(&c.MaintenanceCfg.MemRequest, "maintenance-job-mem-request", c.MaintenanceCfg.MemRequest, "Memory request for maintenance job. Default is no limit.")
flags.StringVar(&c.MaintenanceCfg.CPULimit, "maintenance-job-cpu-limit", c.MaintenanceCfg.CPULimit, "CPU limit for maintenance job. Default is no limit.")
flags.StringVar(&c.MaintenanceCfg.MemLimit, "maintenance-job-mem-limit", c.MaintenanceCfg.MemLimit, "Memory limit for maintenance job. Default is no limit.")
flags.StringVar(&c.BackukpRepoConfig, "backup-repository-config", c.BackukpRepoConfig, "The name of configMap containing backup repository configurations.")
flags.Var(&c.DefaultVolumeSnapshotLocations, "default-volume-snapshot-locations", "List of unique volume providers and default volume snapshot location (provider1:location-01,provider2:location-02,...)")
flags.IntVar(
&c.KeepLatestMaintenanceJobs,
"keep-latest-maintenance-jobs",
c.KeepLatestMaintenanceJobs,
"Number of latest maintenance jobs to keep each repository. Optional.",
)
flags.StringVar(
&c.PodResources.CPURequest,
"maintenance-job-cpu-request",
c.PodResources.CPURequest,
"CPU request for maintenance job. Default is no limit.",
)
flags.StringVar(
&c.PodResources.MemoryRequest,
"maintenance-job-mem-request",
c.PodResources.MemoryRequest,
"Memory request for maintenance job. Default is no limit.",
)
flags.StringVar(
&c.PodResources.CPULimit,
"maintenance-job-cpu-limit",
c.PodResources.CPULimit,
"CPU limit for maintenance job. Default is no limit.",
)
flags.StringVar(
&c.PodResources.MemoryLimit,
"maintenance-job-mem-limit",
c.PodResources.MemoryLimit,
"Memory limit for maintenance job. Default is no limit.",
)
flags.StringVar(
&c.BackupRepoConfig,
"backup-repository-config",
c.BackupRepoConfig,
"The name of configMap containing backup repository configurations.",
)
flags.StringVar(
&c.RepoMaintenanceJobConfig,
"repo-maintenance-job-config",
c.RepoMaintenanceJobConfig,
"The name of ConfigMap containing repository maintenance Job configurations.",
)
}

View File

@@ -9,11 +9,11 @@ import (
func TestGetDefaultConfig(t *testing.T) {
config := GetDefaultConfig()
assert.Equal(t, "info", config.MaintenanceCfg.LogLevelFlag.String())
assert.Equal(t, "0", config.PodResources.CPULimit)
}
func TestBindFlags(t *testing.T) {
config := GetDefaultConfig()
config.BindFlags(pflag.CommandLine)
assert.Equal(t, "info", config.MaintenanceCfg.LogLevelFlag.String())
assert.Equal(t, "0", config.PodResources.CPULimit)
}

View File

@@ -469,7 +469,20 @@ func (s *server) initRepoManager() error {
s.repoLocker = repository.NewRepoLocker()
s.repoEnsurer = repository.NewEnsurer(s.mgr.GetClient(), s.logger, s.config.ResourceTimeout)
s.repoManager = repository.NewManager(s.namespace, s.mgr.GetClient(), s.repoLocker, s.repoEnsurer, s.credentialFileStore, s.credentialSecretStore, s.config.MaintenanceCfg, s.logger)
s.repoManager = repository.NewManager(
s.namespace,
s.mgr.GetClient(),
s.repoLocker,
s.repoEnsurer,
s.credentialFileStore,
s.credentialSecretStore,
s.config.RepoMaintenanceJobConfig,
s.config.PodResources,
s.config.KeepLatestMaintenanceJobs,
s.logger,
s.logLevel,
s.config.LogFormat,
)
return nil
}
@@ -683,7 +696,14 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}
if _, ok := enabledRuntimeControllers[constant.ControllerBackupRepo]; ok {
if err := controller.NewBackupRepoReconciler(s.namespace, s.logger, s.mgr.GetClient(), s.config.RepoMaintenanceFrequency, s.config.BackukpRepoConfig, s.repoManager).SetupWithManager(s.mgr); err != nil {
if err := controller.NewBackupRepoReconciler(
s.namespace,
s.logger,
s.mgr.GetClient(),
s.config.RepoMaintenanceFrequency,
s.config.BackupRepoConfig,
s.repoManager,
).SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerBackupRepo)
}
}