From 50c30942c1a514cf00596e34857e3487c9d568fa Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Wed, 23 Jul 2025 02:15:37 -0500 Subject: [PATCH] PriorityClass Support Design Proposal (#8882) PriorityClass Support Design Proposal Design for https://github.com/vmware-tanzu/velero/issues/8869 Signed-off-by: Tiger Kaovilai --- design/priority-class-name-support_design.md | 587 +++++++++++++++++++ 1 file changed, 587 insertions(+) create mode 100644 design/priority-class-name-support_design.md diff --git a/design/priority-class-name-support_design.md b/design/priority-class-name-support_design.md new file mode 100644 index 000000000..2555b3a55 --- /dev/null +++ b/design/priority-class-name-support_design.md @@ -0,0 +1,587 @@ +# PriorityClass Support Design Proposal + +## Abstract +This design document outlines the implementation of priority class name support for Velero components, including the Velero server deployment, node agent daemonset, and maintenance jobs. This feature allows users to specify a priority class name for Velero components, which can be used to influence the scheduling and eviction behavior of these components. + +## Background +Kubernetes allows users to define priority classes, which can be used to influence the scheduling and eviction behavior of pods. Priority classes are defined as cluster-wide resources, and pods can reference them by name. When a pod is created, the priority admission controller uses the priority class name to populate the priority value for the pod. The scheduler then uses this priority value to determine the order in which pods are scheduled. + +Currently, Velero does not provide a way for users to specify a priority class name for its components. This can be problematic in clusters where resource contention is high, as Velero components may be evicted or not scheduled in a timely manner, potentially impacting backup and restore operations. + +## Goals +- Add support for specifying priority class names for Velero components +- Update the Velero CLI to accept priority class name parameters for different components +- Update the Velero deployment, node agent daemonset, maintenance jobs, and data mover pods to use the specified priority class names + +## Non Goals +- Creating or managing priority classes +- Automatically determining the appropriate priority class for Velero components + +## High-Level Design +The implementation will add new fields to the Velero options struct to store the priority class names for the server deployment and node agent daemonset. The Velero CLI will be updated to accept new flags for these components. For data mover pods and maintenance jobs, priority class names will be configured through existing ConfigMap mechanisms (`node-agent-configmap` for data movers and `repo-maintenance-job-configmap` for maintenance jobs). The Velero deployment, node agent daemonset, maintenance jobs, and data mover pods will be updated to use their respective priority class names. + +## Detailed Design + +### CLI Changes +New flags will be added to the `velero install` command to specify priority class names for different components: + +```go +flags.StringVar( + &o.ServerPriorityClassName, + "server-priority-class-name", + o.ServerPriorityClassName, + "Priority class name for the Velero server deployment. Optional.", +) + +flags.StringVar( + &o.NodeAgentPriorityClassName, + "node-agent-priority-class-name", + o.NodeAgentPriorityClassName, + "Priority class name for the node agent daemonset. Optional.", +) +``` + +Note: Priority class names for data mover pods and maintenance jobs will be configured through their respective ConfigMaps (`--node-agent-configmap` for data movers and `--repo-maintenance-job-configmap` for maintenance jobs). + +### Velero Options Changes +The `VeleroOptions` struct in `pkg/install/resources.go` will be updated to include new fields for priority class names: + +```go +type VeleroOptions struct { + // ... existing fields ... + ServerPriorityClassName string + NodeAgentPriorityClassName string +} +``` + +### Deployment Changes +The `podTemplateConfig` struct in `pkg/install/deployment.go` will be updated to include a new field for the priority class name: + +```go +type podTemplateConfig struct { + // ... existing fields ... + priorityClassName string +} +``` + +A new function, `WithPriorityClassName`, will be added to set this field: + +```go +func WithPriorityClassName(priorityClassName string) podTemplateOption { + return func(c *podTemplateConfig) { + c.priorityClassName = priorityClassName + } +} +``` + +The `Deployment` function will be updated to use the priority class name: + +```go +deployment := &appsv1api.Deployment{ + // ... existing fields ... + Spec: appsv1api.DeploymentSpec{ + // ... existing fields ... + Template: corev1api.PodTemplateSpec{ + // ... existing fields ... + Spec: corev1api.PodSpec{ + // ... existing fields ... + PriorityClassName: c.priorityClassName, + }, + }, + }, +} +``` + +### DaemonSet Changes +The `DaemonSet` function will use the priority class name passed via the podTemplateConfig (from the CLI flag): + +```go +daemonSet := &appsv1api.DaemonSet{ + // ... existing fields ... + Spec: appsv1api.DaemonSetSpec{ + // ... existing fields ... + Template: corev1api.PodTemplateSpec{ + // ... existing fields ... + Spec: corev1api.PodSpec{ + // ... existing fields ... + PriorityClassName: c.priorityClassName, + }, + }, + }, +} +``` + +### Maintenance Job Changes +The `JobConfigs` struct in `pkg/repository/maintenance/maintenance.go` will be updated to include a field for the priority class name: + +```go +type JobConfigs struct { + // LoadAffinities is the config for repository maintenance job load affinity. + LoadAffinities []*kube.LoadAffinity `json:"loadAffinity,omitempty"` + + // PodResources is the config for the CPU and memory resources setting. + PodResources *kube.PodResources `json:"podResources,omitempty"` + + // PriorityClassName is the priority class name for the maintenance job pod + // Note: This is only read from the global configuration, not per-repository + PriorityClassName string `json:"priorityClassName,omitempty"` +} +``` + +The `buildJob` function will be updated to use the priority class name from the global job configuration: + +```go +func buildJob(cli client.Client, ctx context.Context, repo *velerov1api.BackupRepository, bslName string, config *JobConfigs, + podResources kube.PodResources, logLevel logrus.Level, logFormat *logging.FormatFlag) (*batchv1.Job, error) { + // ... existing code ... + + // Use the priority class name from the global job configuration if available + // Note: Priority class is only read from global config, not per-repository + priorityClassName := "" + if config != nil && config.PriorityClassName != "" { + priorityClassName = config.PriorityClassName + } + + // ... existing code ... + + job := &batchv1.Job{ + // ... existing fields ... + Spec: batchv1.JobSpec{ + // ... existing fields ... + Template: corev1api.PodTemplateSpec{ + // ... existing fields ... + Spec: corev1api.PodSpec{ + // ... existing fields ... + PriorityClassName: priorityClassName, + }, + }, + }, + } + + // ... existing code ... +} +``` + +Users will be able to configure the priority class name for all maintenance jobs by creating the repository maintenance job ConfigMap before installation. For example: + +```bash +# Create the ConfigMap before running velero install +cat <