diff --git a/.github/workflows/pr-codespell.yml b/.github/workflows/pr-codespell.yml index 900371ca2..041403149 100644 --- a/.github/workflows/pr-codespell.yml +++ b/.github/workflows/pr-codespell.yml @@ -15,7 +15,7 @@ jobs: with: # ignore the config/.../crd.go file as it's generated binary data that is edited elsewhere. skip: .git,*.png,*.jpg,*.woff,*.ttf,*.gif,*.ico,./config/crd/v1beta1/crds/crds.go,./config/crd/v1/crds/crds.go,./config/crd/v2alpha1/crds/crds.go,./go.sum,./LICENSE - ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin + ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin,sie check_filenames: true check_hidden: true diff --git a/changelogs/unreleased/8883-kaovilai b/changelogs/unreleased/8883-kaovilai new file mode 100644 index 000000000..63be04bf1 --- /dev/null +++ b/changelogs/unreleased/8883-kaovilai @@ -0,0 +1 @@ +Implement PriorityClass Support diff --git a/changelogs/unreleased/9145-reasonerjt b/changelogs/unreleased/9145-reasonerjt new file mode 100644 index 000000000..0cf694adf --- /dev/null +++ b/changelogs/unreleased/9145-reasonerjt @@ -0,0 +1 @@ +Add include/exclude policy to resources policy \ No newline at end of file diff --git a/design/priority-class-name-support_design.md b/design/Implemented/priority-class-name-support_design.md similarity index 76% rename from design/priority-class-name-support_design.md rename to design/Implemented/priority-class-name-support_design.md index 2555b3a55..f47d6ece0 100644 --- a/design/priority-class-name-support_design.md +++ b/design/Implemented/priority-class-name-support_design.md @@ -1,28 +1,34 @@ # PriorityClass Support Design Proposal ## Abstract + This design document outlines the implementation of priority class name support for Velero components, including the Velero server deployment, node agent daemonset, and maintenance jobs. This feature allows users to specify a priority class name for Velero components, which can be used to influence the scheduling and eviction behavior of these components. ## Background + Kubernetes allows users to define priority classes, which can be used to influence the scheduling and eviction behavior of pods. Priority classes are defined as cluster-wide resources, and pods can reference them by name. When a pod is created, the priority admission controller uses the priority class name to populate the priority value for the pod. The scheduler then uses this priority value to determine the order in which pods are scheduled. Currently, Velero does not provide a way for users to specify a priority class name for its components. This can be problematic in clusters where resource contention is high, as Velero components may be evicted or not scheduled in a timely manner, potentially impacting backup and restore operations. ## Goals + - Add support for specifying priority class names for Velero components - Update the Velero CLI to accept priority class name parameters for different components - Update the Velero deployment, node agent daemonset, maintenance jobs, and data mover pods to use the specified priority class names ## Non Goals + - Creating or managing priority classes - Automatically determining the appropriate priority class for Velero components ## High-Level Design + The implementation will add new fields to the Velero options struct to store the priority class names for the server deployment and node agent daemonset. The Velero CLI will be updated to accept new flags for these components. For data mover pods and maintenance jobs, priority class names will be configured through existing ConfigMap mechanisms (`node-agent-configmap` for data movers and `repo-maintenance-job-configmap` for maintenance jobs). The Velero deployment, node agent daemonset, maintenance jobs, and data mover pods will be updated to use their respective priority class names. ## Detailed Design ### CLI Changes + New flags will be added to the `velero install` command to specify priority class names for different components: ```go @@ -44,6 +50,7 @@ flags.StringVar( Note: Priority class names for data mover pods and maintenance jobs will be configured through their respective ConfigMaps (`--node-agent-configmap` for data movers and `--repo-maintenance-job-configmap` for maintenance jobs). ### Velero Options Changes + The `VeleroOptions` struct in `pkg/install/resources.go` will be updated to include new fields for priority class names: ```go @@ -55,6 +62,7 @@ type VeleroOptions struct { ``` ### Deployment Changes + The `podTemplateConfig` struct in `pkg/install/deployment.go` will be updated to include a new field for the priority class name: ```go @@ -93,6 +101,7 @@ deployment := &appsv1api.Deployment{ ``` ### DaemonSet Changes + The `DaemonSet` function will use the priority class name passed via the podTemplateConfig (from the CLI flag): ```go @@ -112,6 +121,7 @@ daemonSet := &appsv1api.DaemonSet{ ``` ### Maintenance Job Changes + The `JobConfigs` struct in `pkg/repository/maintenance/maintenance.go` will be updated to include a field for the priority class name: ```go @@ -187,6 +197,7 @@ velero install --provider aws \ The ConfigMap can be updated after installation to change the priority class for future maintenance jobs. Note that only the "global" configuration is used for priority class - all maintenance jobs will use the same priority class regardless of which repository they are maintaining. ### Node Agent ConfigMap Changes + We'll update the `Configs` struct in `pkg/nodeagent/node_agent.go` to include a field for the priority class name in the node-agent-configmap: ```go @@ -284,14 +295,47 @@ A new function, `GetDataMoverPriorityClassName`, will be added to the `pkg/util/ // GetDataMoverPriorityClassName retrieves the priority class name for data mover pods from the node-agent-configmap func GetDataMoverPriorityClassName(ctx context.Context, namespace string, kubeClient kubernetes.Interface, configName string) (string, error) { - // Get from node-agent-configmap - configs, err := nodeagent.GetConfigs(ctx, namespace, kubeClient, configName) - if err == nil && configs != nil && configs.PriorityClassName != "" { - return configs.PriorityClassName, nil + // configData is a minimal struct to parse only the priority class name from the ConfigMap + type configData struct { + PriorityClassName string `json:"priorityClassName,omitempty"` } - - // Return empty string if not found in configmap - return "", nil + + // Get the ConfigMap + cm, err := kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, configName, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + // ConfigMap not found is not an error, just return empty string + return "", nil + } + return "", errors.Wrapf(err, "error getting node agent config map %s", configName) + } + + if cm.Data == nil { + // No data in ConfigMap, return empty string + return "", nil + } + + // Extract the first value from the ConfigMap data + jsonString := "" + for _, v := range cm.Data { + jsonString = v + break // Use the first value found + } + + if jsonString == "" { + // No data to parse, return empty string + return "", nil + } + + // Parse the JSON to extract priority class name + var config configData + if err := json.Unmarshal([]byte(jsonString), &config); err != nil { + // Invalid JSON is not a critical error for priority class + // Just return empty string to use default behavior + return "", nil + } + + return config.PriorityClassName, nil } ``` @@ -307,11 +351,12 @@ To improve observability and help with troubleshooting, the implementation will // In pkg/util/kube/priority_class.go // ValidatePriorityClass checks if the specified priority class exists in the cluster -// Returns nil if the priority class exists or if priorityClassName is empty -// Returns a warning (not an error) if the priority class doesn't exist -func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, priorityClassName string, logger logrus.FieldLogger) { +// Returns true if the priority class exists or if priorityClassName is empty +// Returns false if the priority class doesn't exist or validation fails +// Logs warnings when the priority class doesn't exist +func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, priorityClassName string, logger logrus.FieldLogger) bool { if priorityClassName == "" { - return + return true } _, err := kubeClient.SchedulingV1().PriorityClasses().Get(ctx, priorityClassName, metav1.GetOptions{}) @@ -321,9 +366,10 @@ func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, } else { logger.WithError(err).Warnf("Failed to validate priority class %q", priorityClassName) } - } else { - logger.Infof("Validated priority class %q exists in cluster", priorityClassName) + return false } + logger.Infof("Validated priority class %q exists in cluster", priorityClassName) + return true } ``` @@ -352,6 +398,7 @@ if priorityClassName != "" { ``` These validation and logging features will help administrators: + - Identify configuration issues early (validation warnings) - Troubleshoot priority class application issues - Verify that priority classes are being applied as expected @@ -371,20 +418,30 @@ The `ValidatePriorityClass` function should be called at the following points: - Before creating maintenance jobs Example usage: + ```go // During velero install if o.ServerPriorityClassName != "" { - kube.ValidatePriorityClass(ctx, kubeClient, o.ServerPriorityClassName, logger.WithField("component", "server")) + _ = kube.ValidatePriorityClass(ctx, kubeClient, o.ServerPriorityClassName, logger.WithField("component", "server")) + // For install command, we continue even if validation fails (warnings are logged) } -// When reading from ConfigMap +// When reading from ConfigMap in node-agent server priorityClassName, err := kube.GetDataMoverPriorityClassName(ctx, namespace, kubeClient, configMapName) if err == nil && priorityClassName != "" { - kube.ValidatePriorityClass(ctx, kubeClient, priorityClassName, logger.WithField("component", "data-mover")) + // Validate the priority class exists in the cluster + if kube.ValidatePriorityClass(ctx, kubeClient, priorityClassName, logger.WithField("component", "data-mover")) { + dataMovePriorityClass = priorityClassName + logger.WithField("priorityClassName", priorityClassName).Info("Using priority class for data mover pods") + } else { + logger.WithField("priorityClassName", priorityClassName).Warn("Priority class not found in cluster, data mover pods will use default priority") + // Clear the priority class to prevent pod creation failures + priorityClassName = "" + } } ``` -Note: Since validation only logs warnings (not errors), it won't block operations if a priority class doesn't exist. This allows for scenarios where priority classes might be created after Velero installation. +Note: The validation function returns a boolean to allow callers to decide how to handle missing priority classes. For the install command, validation failures are ignored (only warnings are logged) to allow for scenarios where priority classes might be created after Velero installation. For runtime components like the node-agent server, the priority class is cleared if validation fails to prevent pod creation failures. ## Alternatives Considered @@ -402,6 +459,26 @@ There are no security considerations for this feature. This feature is compatible with all Kubernetes versions that support priority classes. The PodPriority feature became stable in Kubernetes 1.14. For more information, see the [Kubernetes documentation on Pod Priority and Preemption](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/). +## ConfigMap Update Strategy + +### Static ConfigMap Reading at Startup + +The node-agent server reads and parses the ConfigMap once during initialization and passes configurations (like `podResources`, `loadAffinity`, and `priorityClassName`) directly to controllers as parameters. This approach ensures: + +- Single ConfigMap read to minimize API calls +- Consistent configuration across all controllers +- Validation of priority classes at startup with fallback behavior +- No need for complex update mechanisms or watchers + +ConfigMap changes require a restart of the node-agent to take effect. + +### Implementation Approach + +1. **Data Mover Controllers**: Receive priority class as a string parameter from node-agent server at initialization +2. **Maintenance Job Controller**: Read fresh configuration from repo-maintenance-job-configmap at job creation time +3. ConfigMap changes require restart of components to take effect +4. Priority class validation happens at startup with automatic fallback to prevent failures + ## Implementation The implementation will involve the following steps: @@ -519,10 +596,10 @@ velero install \ When configuring priority classes for Velero components, consider the following hierarchy based on component criticality: -1. **Velero Server (Highest Priority)**: +1. **Velero Server (Highest Priority)**: - Example: `velero-critical` with value 100 - Rationale: The server must remain running to coordinate backup/restore operations - + 2. **Node Agent DaemonSet (Medium Priority)**: - Example: `velero-standard` with value 50 - Rationale: Node agents need to be available on nodes but are less critical than the server @@ -544,37 +621,47 @@ This approach has several advantages: The priority class name for data mover pods will be determined by checking the node-agent-configmap. This approach provides a centralized way to configure priority class names for all data mover pods. The same approach will be used for PVB (PodVolumeBackup) and PVR (PodVolumeRestore) pods, which will also retrieve their priority class name from the node-agent-configmap. -For PVB and PVR pods specifically, the controllers will need to be updated to retrieve the priority class name from the node-agent-configmap and pass it to the pod creation functions. For example, in the PodVolumeBackup controller: +For PVB and PVR pods specifically, the implementation follows this approach: + +1. **Controller Initialization**: Both PodVolumeBackup and PodVolumeRestore controllers are updated to accept a priority class name as a string parameter. The node-agent server reads the priority class from the node-agent-configmap once at startup: ```go -// In pkg/controller/pod_volume_backup_controller.go -priorityClassName, _ := kube.GetDataMoverPriorityClassName(ctx, namespace, kubeClient, configMapName) +// In node-agent server startup (pkg/cmd/cli/nodeagent/server.go) +dataMovePriorityClass := "" +if s.config.nodeAgentConfig != "" { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) + defer cancel() + priorityClass, err := kube.GetDataMoverPriorityClassName(ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig) + if err != nil { + s.logger.WithError(err).Warn("Failed to get priority class name from node-agent-configmap, using empty value") + } else if priorityClass != "" { + // Validate the priority class exists in the cluster + if kube.ValidatePriorityClass(ctx, s.kubeClient, priorityClass, s.logger.WithField("component", "data-mover")) { + dataMovePriorityClass = priorityClass + s.logger.WithField("priorityClassName", priorityClass).Info("Using priority class for data mover pods") + } else { + s.logger.WithField("priorityClassName", priorityClass).Warn("Priority class not found in cluster, data mover pods will use default priority") + } + } +} -// Add priorityClassName to the pod spec -pod := &corev1api.Pod{ +// Pass priority class to controllers +pvbReconciler := controller.NewPodVolumeBackupReconciler( + s.mgr.GetClient(), s.mgr, s.kubeClient, ..., dataMovePriorityClass) +pvrReconciler := controller.NewPodVolumeRestoreReconciler( + s.mgr.GetClient(), s.mgr, s.kubeClient, ..., dataMovePriorityClass) +``` + +2. **Controller Structure**: Controllers store the priority class name as a field: + +```go +type PodVolumeBackupReconciler struct { // ... existing fields ... - Spec: corev1api.PodSpec{ - // ... existing fields ... - PriorityClassName: priorityClassName, - }, + dataMovePriorityClass string } ``` -Similarly, in the PodVolumeRestore controller: - -```go -// In pkg/controller/pod_volume_restore_controller.go -priorityClassName, _ := kube.GetDataMoverPriorityClassName(ctx, namespace, kubeClient, configMapName) - -// Add priorityClassName to the pod spec -pod := &corev1api.Pod{ - // ... existing fields ... - Spec: corev1api.PodSpec{ - // ... existing fields ... - PriorityClassName: priorityClassName, - }, -} -``` +3. **Pod Creation**: The priority class is included in the pod spec when creating data mover pods. ### VGDP Micro-Service Considerations @@ -582,6 +669,26 @@ With the introduction of VGDP micro-services (as described in the VGDP micro-ser This ensures that all pods created by Velero for data movement operations (CSI snapshot data movement, PVB, and PVR) use a consistent approach for priority class name configuration through the node-agent-configmap. +### How Exposers Receive Configuration + +CSI Snapshot Exposer and Generic Restore Exposer do not directly watch or read ConfigMaps. Instead, they receive configuration through their parent controllers: + +1. **Controller Initialization**: Controllers receive the priority class name as a parameter during initialization from the node-agent server. + +2. **Configuration Propagation**: During reconciliation of resources: + - The controller calls `setupExposeParam()` which includes the `dataMovePriorityClass` value + - For CSI operations: `CSISnapshotExposeParam.PriorityClassName` is set + - For generic restore: `GenericRestoreExposeParam.PriorityClassName` is set + - The controller passes these parameters to the exposer's `Expose()` method + +3. **Pod Creation**: The exposer creates pods with the priority class name provided by the controller. + +This design keeps exposers stateless and ensures: +- Exposers remain simple and focused on pod creation +- All configuration flows through controllers consistently +- No complex state synchronization between components +- Configuration changes require component restart to take effect + ## Open Issues None. diff --git a/internal/resourcepolicies/resource_policies.go b/internal/resourcepolicies/resource_policies.go index 7d3930280..0837e63e2 100644 --- a/internal/resourcepolicies/resource_policies.go +++ b/internal/resourcepolicies/resource_policies.go @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package resourcepolicies import ( @@ -20,6 +21,8 @@ import ( "fmt" "strings" + "k8s.io/apimachinery/pkg/util/sets" + "github.com/pkg/errors" "github.com/sirupsen/logrus" corev1api "k8s.io/api/core/v1" @@ -49,24 +52,58 @@ type Action struct { Parameters map[string]any `yaml:"parameters,omitempty"` } -// volumePolicy defined policy to conditions to match Volumes and related action to handle matched Volumes +// IncludeExcludePolicy defined policy to include or exclude resources based on the names +type IncludeExcludePolicy struct { + // The following fields have the same semantics as those from the spec of backup. + // Refer to the comment in the velerov1api.BackupSpec for more details. + IncludedClusterScopedResources []string `yaml:"includedClusterScopedResources"` + ExcludedClusterScopedResources []string `yaml:"excludedClusterScopedResources"` + IncludedNamespaceScopedResources []string `yaml:"includedNamespaceScopedResources"` + ExcludedNamespaceScopedResources []string `yaml:"excludedNamespaceScopedResources"` +} + +func (p *IncludeExcludePolicy) Validate() error { + if err := p.validateIncludeExclude(p.IncludedClusterScopedResources, p.ExcludedClusterScopedResources); err != nil { + return err + } + return p.validateIncludeExclude(p.IncludedNamespaceScopedResources, p.ExcludedNamespaceScopedResources) +} + +func (p *IncludeExcludePolicy) validateIncludeExclude(includesList, excludesList []string) error { + includes := sets.NewString(includesList...) + excludes := sets.NewString(excludesList...) + + if includes.Has("*") || excludes.Has("*") { + return fmt.Errorf("cannot use '*' in includes or excludes filters in the policy") + } + for _, itm := range excludes.List() { + if includes.Has(itm) { + return fmt.Errorf("excludes list cannot contain an item in the includes list: %s", itm) + } + } + return nil +} + +// VolumePolicy defined policy to conditions to match Volumes and related action to handle matched Volumes type VolumePolicy struct { // Conditions defined list of conditions to match Volumes Conditions map[string]any `yaml:"conditions"` Action Action `yaml:"action"` } -// resourcePolicies currently defined slice of volume policies to handle backup +// ResourcePolicies currently defined slice of volume policies to handle backup type ResourcePolicies struct { - Version string `yaml:"version"` - VolumePolicies []VolumePolicy `yaml:"volumePolicies"` + Version string `yaml:"version"` + VolumePolicies []VolumePolicy `yaml:"volumePolicies"` + IncludeExcludePolicy *IncludeExcludePolicy `yaml:"includeExcludePolicy"` // we may support other resource policies in the future, and they could be added separately // OtherResourcePolicies []OtherResourcePolicy } type Policies struct { - version string - volumePolicies []volPolicy + version string + volumePolicies []volPolicy + includeExcludePolicy *IncludeExcludePolicy // OtherPolicies } @@ -115,6 +152,7 @@ func (p *Policies) BuildPolicy(resPolicies *ResourcePolicies) error { // Other resource policies p.version = resPolicies.Version + p.includeExcludePolicy = resPolicies.IncludeExcludePolicy return nil } @@ -175,9 +213,20 @@ func (p *Policies) Validate() error { } } } + + if p.GetIncludeExcludePolicy() != nil { + if err := p.GetIncludeExcludePolicy().Validate(); err != nil { + return errors.WithStack(err) + } + } + return nil } +func (p *Policies) GetIncludeExcludePolicy() *IncludeExcludePolicy { + return p.includeExcludePolicy +} + func GetResourcePoliciesFromBackup( backup velerov1api.Backup, client crclient.Client, diff --git a/internal/resourcepolicies/volume_resources_validator_test.go b/internal/resourcepolicies/volume_resources_validator_test.go index cf795b3ae..f2812a786 100644 --- a/internal/resourcepolicies/volume_resources_validator_test.go +++ b/internal/resourcepolicies/volume_resources_validator_test.go @@ -453,6 +453,102 @@ func TestValidate(t *testing.T) { }, wantErr: true, }, + { + name: " '*' in the filters of include exclude policy - 1", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"*"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods"}, + ExcludedNamespaceScopedResources: []string{"secrets"}, + }, + }, + wantErr: true, + }, + { + name: " '*' in the filters of include exclude policy - 2", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods"}, + ExcludedNamespaceScopedResources: []string{"*"}, + }, + }, + wantErr: true, + }, + { + name: " dup item in both the include and exclude filters of include exclude policy", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods", "configmaps"}, + ExcludedNamespaceScopedResources: []string{"secrets", "pods"}, + }, + }, + wantErr: true, + }, + { + name: " valid volume policies and valid include/exclude policy", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods", "configmaps"}, + ExcludedNamespaceScopedResources: []string{"secrets"}, + }, + }, + wantErr: false, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/pkg/backup/backup.go b/pkg/backup/backup.go index 755eaea56..a11acd52a 100644 --- a/pkg/backup/backup.go +++ b/pkg/backup/backup.go @@ -43,7 +43,6 @@ import ( kbclient "sigs.k8s.io/controller-runtime/pkg/client" "github.com/vmware-tanzu/velero/internal/hook" - "github.com/vmware-tanzu/velero/internal/resourcepolicies" "github.com/vmware-tanzu/velero/internal/volume" "github.com/vmware-tanzu/velero/internal/volumehelper" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" @@ -270,13 +269,17 @@ func (kb *kubernetesBackupper) BackupWithResolvers( backupRequest.Spec.IncludeClusterResources, *backupRequest.NamespaceIncludesExcludes) } else { - backupRequest.ResourceIncludesExcludes = collections.GetScopeResourceIncludesExcludes(kb.discoveryHelper, log, + srie := collections.GetScopeResourceIncludesExcludes(kb.discoveryHelper, log, backupRequest.Spec.IncludedNamespaceScopedResources, backupRequest.Spec.ExcludedNamespaceScopedResources, backupRequest.Spec.IncludedClusterScopedResources, backupRequest.Spec.ExcludedClusterScopedResources, *backupRequest.NamespaceIncludesExcludes, ) + if backupRequest.ResPolicies != nil { + srie.CombineWithPolicy(backupRequest.ResPolicies.GetIncludeExcludePolicy()) + } + backupRequest.ResourceIncludesExcludes = srie } log.Infof("Backing up all volumes using pod volume backup: %t", boolptr.IsSetToTrue(backupRequest.Backup.Spec.DefaultVolumesToFsBackup)) @@ -355,11 +358,6 @@ func (kb *kubernetesBackupper) BackupWithResolvers( } backupRequest.Status.Progress = &velerov1api.BackupProgress{TotalItems: len(items)} - var resourcePolicy *resourcepolicies.Policies - if backupRequest.ResPolicies != nil { - resourcePolicy = backupRequest.ResPolicies - } - itemBackupper := &itemBackupper{ backupRequest: backupRequest, tarWriter: tw, @@ -374,7 +372,7 @@ func (kb *kubernetesBackupper) BackupWithResolvers( }, hookTracker: hook.NewHookTracker(), volumeHelperImpl: volumehelper.NewVolumeHelperImpl( - resourcePolicy, + backupRequest.ResPolicies, backupRequest.Spec.SnapshotVolumes, log, kb.kbClient, diff --git a/pkg/cmd/cli/install/install.go b/pkg/cmd/cli/install/install.go index 2a6194f7b..7079ec79b 100644 --- a/pkg/cmd/cli/install/install.go +++ b/pkg/cmd/cli/install/install.go @@ -91,6 +91,8 @@ type Options struct { ItemBlockWorkerCount int NodeAgentDisableHostPath bool kubeletRootDir string + ServerPriorityClassName string + NodeAgentPriorityClassName string } // BindFlags adds command line values to the options struct. @@ -194,6 +196,18 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) { o.ItemBlockWorkerCount, "Number of worker threads to process ItemBlocks. Default is one. Optional.", ) + flags.StringVar( + &o.ServerPriorityClassName, + "server-priority-class-name", + o.ServerPriorityClassName, + "Priority class name for the Velero server deployment. Optional.", + ) + flags.StringVar( + &o.NodeAgentPriorityClassName, + "node-agent-priority-class-name", + o.NodeAgentPriorityClassName, + "Priority class name for the node agent daemonset. Optional.", + ) } // NewInstallOptions instantiates a new, default InstallOptions struct. @@ -301,6 +315,8 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) { ItemBlockWorkerCount: o.ItemBlockWorkerCount, KubeletRootDir: o.kubeletRootDir, NodeAgentDisableHostPath: o.NodeAgentDisableHostPath, + ServerPriorityClassName: o.ServerPriorityClassName, + NodeAgentPriorityClassName: o.NodeAgentPriorityClassName, }, nil } @@ -389,6 +405,7 @@ func (o *Options) Run(c *cobra.Command, f client.Factory) error { if err != nil { return err } + errorMsg := fmt.Sprintf("\n\nError installing Velero. Use `kubectl logs deploy/velero -n %s` to check the deploy logs", o.Namespace) err = install.Install(dynamicFactory, kbClient, resources, os.Stdout) diff --git a/pkg/cmd/cli/install/install_test.go b/pkg/cmd/cli/install/install_test.go new file mode 100644 index 000000000..c5d147646 --- /dev/null +++ b/pkg/cmd/cli/install/install_test.go @@ -0,0 +1,93 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package install + +import ( + "testing" + + "github.com/spf13/pflag" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPriorityClassNameFlag(t *testing.T) { + // Test that the flag is properly defined + o := NewInstallOptions() + flags := pflag.NewFlagSet("test", pflag.ContinueOnError) + o.BindFlags(flags) + + // Verify the server priority class flag exists + serverFlag := flags.Lookup("server-priority-class-name") + assert.NotNil(t, serverFlag, "server-priority-class-name flag should exist") + assert.Equal(t, "Priority class name for the Velero server deployment. Optional.", serverFlag.Usage) + + // Verify the node agent priority class flag exists + nodeAgentFlag := flags.Lookup("node-agent-priority-class-name") + assert.NotNil(t, nodeAgentFlag, "node-agent-priority-class-name flag should exist") + assert.Equal(t, "Priority class name for the node agent daemonset. Optional.", nodeAgentFlag.Usage) + + // Test with values for both server and node agent + testCases := []struct { + name string + serverPriorityClassName string + nodeAgentPriorityClassName string + expectedServerValue string + expectedNodeAgentValue string + }{ + { + name: "with both priority class names", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "medium-priority", + expectedServerValue: "high-priority", + expectedNodeAgentValue: "medium-priority", + }, + { + name: "with only server priority class name", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "", + expectedServerValue: "high-priority", + expectedNodeAgentValue: "", + }, + { + name: "with only node agent priority class name", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "medium-priority", + expectedServerValue: "", + expectedNodeAgentValue: "medium-priority", + }, + { + name: "without priority class names", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "", + expectedServerValue: "", + expectedNodeAgentValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + o := NewInstallOptions() + o.ServerPriorityClassName = tc.serverPriorityClassName + o.NodeAgentPriorityClassName = tc.nodeAgentPriorityClassName + + veleroOptions, err := o.AsVeleroOptions() + require.NoError(t, err) + assert.Equal(t, tc.expectedServerValue, veleroOptions.ServerPriorityClassName) + assert.Equal(t, tc.expectedNodeAgentValue, veleroOptions.NodeAgentPriorityClassName) + }) + } +} diff --git a/pkg/cmd/cli/nodeagent/server.go b/pkg/cmd/cli/nodeagent/server.go index dae91d91f..0ff5b2f1c 100644 --- a/pkg/cmd/cli/nodeagent/server.go +++ b/pkg/cmd/cli/nodeagent/server.go @@ -280,6 +280,21 @@ func (s *nodeAgentServer) run() { s.logger.Info("Starting controllers") + // Get priority class from dataPathConfigs if available + dataMovePriorityClass := "" + if s.dataPathConfigs != nil && s.dataPathConfigs.PriorityClassName != "" { + priorityClass := s.dataPathConfigs.PriorityClassName + // Validate the priority class exists in the cluster + ctx, cancel := context.WithTimeout(s.ctx, time.Second*30) + defer cancel() + if kube.ValidatePriorityClass(ctx, s.kubeClient, priorityClass, s.logger.WithField("component", "data-mover")) { + dataMovePriorityClass = priorityClass + s.logger.WithField("priorityClassName", priorityClass).Info("Using priority class for data mover pods") + } else { + s.logger.WithField("priorityClassName", priorityClass).Warn("Priority class not found in cluster, data mover pods will use default priority") + } + } + var loadAffinity []*kube.LoadAffinity if s.dataPathConfigs != nil && len(s.dataPathConfigs.LoadAffinity) > 0 { loadAffinity = s.dataPathConfigs.LoadAffinity @@ -311,12 +326,12 @@ func (s *nodeAgentServer) run() { } } - pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger) + pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass) if err := pvbReconciler.SetupWithManager(s.mgr); err != nil { s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup) } - pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger) + pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger, dataMovePriorityClass) if err := pvrReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller") } @@ -340,6 +355,7 @@ func (s *nodeAgentServer) run() { s.config.dataMoverPrepareTimeout, s.logger, s.metrics, + dataMovePriorityClass, ) if err := dataUploadReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the data upload controller") @@ -364,6 +380,7 @@ func (s *nodeAgentServer) run() { s.config.dataMoverPrepareTimeout, s.logger, s.metrics, + dataMovePriorityClass, ) if err := dataDownloadReconciler.SetupWithManager(s.mgr); err != nil { diff --git a/pkg/cmd/util/output/backup_describer_test.go b/pkg/cmd/util/output/backup_describer_test.go index 1d158f1ed..96e25466f 100644 --- a/pkg/cmd/util/output/backup_describer_test.go +++ b/pkg/cmd/util/output/backup_describer_test.go @@ -235,9 +235,10 @@ Hooks: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -292,9 +293,10 @@ OrderedResources: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -325,9 +327,10 @@ Hooks: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: diff --git a/pkg/cmd/util/output/schedule_describe_test.go b/pkg/cmd/util/output/schedule_describe_test.go index 4d002619b..12e8c5858 100644 --- a/pkg/cmd/util/output/schedule_describe_test.go +++ b/pkg/cmd/util/output/schedule_describe_test.go @@ -32,9 +32,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -81,9 +82,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -127,9 +129,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -174,9 +177,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index 18b351e20..532a5f332 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -558,8 +558,11 @@ func (b *backupReconciler) prepareBackupRequest(backup *velerov1api.Backup, logg if err != nil { request.Status.ValidationErrors = append(request.Status.ValidationErrors, err.Error()) } + if resourcePolicies != nil && resourcePolicies.GetIncludeExcludePolicy() != nil && collections.UseOldResourceFilters(request.Spec) { + request.Status.ValidationErrors = append(request.Status.ValidationErrors, "include-resources, exclude-resources and include-cluster-resources are old filter parameters.\n"+ + "They cannot be used with include-exclude policies.") + } request.ResPolicies = resourcePolicies - return request } @@ -812,7 +815,6 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { fatalErrs = append(fatalErrs, errs...) } } - b.logger.WithField(constant.ControllerBackup, kubeutil.NamespaceAndName(backup)).Infof("Initial backup processing complete, moving to %s", backup.Status.Phase) // if we return a non-nil error, the calling function will update diff --git a/pkg/controller/backup_controller_test.go b/pkg/controller/backup_controller_test.go index eb20af907..715f5068a 100644 --- a/pkg/controller/backup_controller_test.go +++ b/pkg/controller/backup_controller_test.go @@ -702,10 +702,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -740,10 +741,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: "alt-loc", - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: "alt-loc", + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -782,10 +784,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: "read-write", - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: "read-write", + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -820,11 +823,12 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - TTL: metav1.Duration{Duration: 10 * time.Minute}, - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + TTL: metav1.Duration{Duration: 10 * time.Minute}, + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -860,10 +864,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -900,10 +905,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -940,10 +946,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -980,10 +987,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1020,10 +1028,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1061,10 +1070,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFailed, @@ -1102,10 +1112,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFailed, @@ -1143,10 +1154,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.True(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.True(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1185,10 +1197,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1227,10 +1240,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1269,10 +1283,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.True(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.True(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1312,10 +1327,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1354,10 +1370,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.True(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.True(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, diff --git a/pkg/controller/data_download_controller.go b/pkg/controller/data_download_controller.go index c4b6f63ba..a8a2fc633 100644 --- a/pkg/controller/data_download_controller.go +++ b/pkg/controller/data_download_controller.go @@ -71,6 +71,7 @@ type DataDownloadReconciler struct { preparingTimeout time.Duration metrics *metrics.ServerMetrics cancelledDataDownload map[string]time.Time + dataMovePriorityClass string } func NewDataDownloadReconciler( @@ -86,6 +87,7 @@ func NewDataDownloadReconciler( preparingTimeout time.Duration, logger logrus.FieldLogger, metrics *metrics.ServerMetrics, + dataMovePriorityClass string, ) *DataDownloadReconciler { return &DataDownloadReconciler{ client: client, @@ -103,6 +105,7 @@ func NewDataDownloadReconciler( preparingTimeout: preparingTimeout, metrics: metrics, cancelledDataDownload: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } @@ -890,6 +893,7 @@ func (r *DataDownloadReconciler) setupExposeParam(dd *velerov2alpha1api.DataDown NodeOS: nodeOS, RestorePVCConfig: r.restorePVCConfig, LoadAffinity: r.loadAffinity, + PriorityClassName: r.dataMovePriorityClass, }, nil } diff --git a/pkg/controller/data_download_controller_test.go b/pkg/controller/data_download_controller_test.go index 4dc17ea9f..048a1027c 100644 --- a/pkg/controller/data_download_controller_test.go +++ b/pkg/controller/data_download_controller_test.go @@ -130,19 +130,7 @@ func initDataDownloadReconcilerWithError(t *testing.T, objects []any, needError dataPathMgr := datapath.NewManager(1) - return NewDataDownloadReconciler( - &fakeClient, - nil, - fakeKubeClient, - dataPathMgr, - nil, - nil, - nodeagent.RestorePVC{}, - corev1api.ResourceRequirements{}, - "test-node", - time.Minute*5, - velerotest.NewLogger(), - metrics.NewServerMetrics()), nil + return NewDataDownloadReconciler(&fakeClient, nil, fakeKubeClient, dataPathMgr, nil, nil, nodeagent.RestorePVC{}, corev1api.ResourceRequirements{}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics(), ""), nil } func TestDataDownloadReconcile(t *testing.T) { diff --git a/pkg/controller/data_upload_controller.go b/pkg/controller/data_upload_controller.go index e5adecd63..502385c14 100644 --- a/pkg/controller/data_upload_controller.go +++ b/pkg/controller/data_upload_controller.go @@ -65,22 +65,23 @@ const ( // DataUploadReconciler reconciles a DataUpload object type DataUploadReconciler struct { - client client.Client - kubeClient kubernetes.Interface - csiSnapshotClient snapshotter.SnapshotV1Interface - mgr manager.Manager - Clock clocks.WithTickerAndDelayedExecution - nodeName string - logger logrus.FieldLogger - snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer - dataPathMgr *datapath.Manager - vgdpCounter *exposer.VgdpCounter - loadAffinity []*kube.LoadAffinity - backupPVCConfig map[string]nodeagent.BackupPVC - podResources corev1api.ResourceRequirements - preparingTimeout time.Duration - metrics *metrics.ServerMetrics - cancelledDataUpload map[string]time.Time + client client.Client + kubeClient kubernetes.Interface + csiSnapshotClient snapshotter.SnapshotV1Interface + mgr manager.Manager + Clock clocks.WithTickerAndDelayedExecution + nodeName string + logger logrus.FieldLogger + snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer + dataPathMgr *datapath.Manager + vgdpCounter *exposer.VgdpCounter + loadAffinity []*kube.LoadAffinity + backupPVCConfig map[string]nodeagent.BackupPVC + podResources corev1api.ResourceRequirements + preparingTimeout time.Duration + metrics *metrics.ServerMetrics + cancelledDataUpload map[string]time.Time + dataMovePriorityClass string } func NewDataUploadReconciler( @@ -98,6 +99,7 @@ func NewDataUploadReconciler( preparingTimeout time.Duration, log logrus.FieldLogger, metrics *metrics.ServerMetrics, + dataMovePriorityClass string, ) *DataUploadReconciler { return &DataUploadReconciler{ client: client, @@ -114,14 +116,15 @@ func NewDataUploadReconciler( log, ), }, - dataPathMgr: dataPathMgr, - vgdpCounter: counter, - loadAffinity: loadAffinity, - backupPVCConfig: backupPVCConfig, - podResources: podResources, - preparingTimeout: preparingTimeout, - metrics: metrics, - cancelledDataUpload: make(map[string]time.Time), + dataPathMgr: dataPathMgr, + vgdpCounter: counter, + loadAffinity: loadAffinity, + backupPVCConfig: backupPVCConfig, + podResources: podResources, + preparingTimeout: preparingTimeout, + metrics: metrics, + cancelledDataUpload: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } @@ -971,6 +974,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload BackupPVCConfig: r.backupPVCConfig, Resources: r.podResources, NodeOS: nodeOS, + PriorityClassName: r.dataMovePriorityClass, }, nil } diff --git a/pkg/controller/data_upload_controller_test.go b/pkg/controller/data_upload_controller_test.go index db4c29c1e..157ccd8d6 100644 --- a/pkg/controller/data_upload_controller_test.go +++ b/pkg/controller/data_upload_controller_test.go @@ -249,6 +249,7 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics(), + "", // dataMovePriorityClass ), nil } diff --git a/pkg/controller/pod_volume_backup_controller.go b/pkg/controller/pod_volume_backup_controller.go index f6eddc14b..3a446379f 100644 --- a/pkg/controller/pod_volume_backup_controller.go +++ b/pkg/controller/pod_volume_backup_controller.go @@ -60,41 +60,43 @@ const ( // NewPodVolumeBackupReconciler creates the PodVolumeBackupReconciler instance func NewPodVolumeBackupReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, counter *exposer.VgdpCounter, nodeName string, preparingTimeout time.Duration, resourceTimeout time.Duration, podResources corev1api.ResourceRequirements, - metrics *metrics.ServerMetrics, logger logrus.FieldLogger) *PodVolumeBackupReconciler { + metrics *metrics.ServerMetrics, logger logrus.FieldLogger, dataMovePriorityClass string) *PodVolumeBackupReconciler { return &PodVolumeBackupReconciler{ - client: client, - mgr: mgr, - kubeClient: kubeClient, - logger: logger.WithField("controller", "PodVolumeBackup"), - nodeName: nodeName, - clock: &clocks.RealClock{}, - metrics: metrics, - podResources: podResources, - dataPathMgr: dataPathMgr, - vgdpCounter: counter, - preparingTimeout: preparingTimeout, - resourceTimeout: resourceTimeout, - exposer: exposer.NewPodVolumeExposer(kubeClient, logger), - cancelledPVB: make(map[string]time.Time), + client: client, + mgr: mgr, + kubeClient: kubeClient, + logger: logger.WithField("controller", "PodVolumeBackup"), + nodeName: nodeName, + clock: &clocks.RealClock{}, + metrics: metrics, + podResources: podResources, + dataPathMgr: dataPathMgr, + vgdpCounter: counter, + preparingTimeout: preparingTimeout, + resourceTimeout: resourceTimeout, + exposer: exposer.NewPodVolumeExposer(kubeClient, logger), + cancelledPVB: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } // PodVolumeBackupReconciler reconciles a PodVolumeBackup object type PodVolumeBackupReconciler struct { - client client.Client - mgr manager.Manager - kubeClient kubernetes.Interface - clock clocks.WithTickerAndDelayedExecution - exposer exposer.PodVolumeExposer - metrics *metrics.ServerMetrics - nodeName string - logger logrus.FieldLogger - podResources corev1api.ResourceRequirements - dataPathMgr *datapath.Manager - vgdpCounter *exposer.VgdpCounter - preparingTimeout time.Duration - resourceTimeout time.Duration - cancelledPVB map[string]time.Time + client client.Client + mgr manager.Manager + kubeClient kubernetes.Interface + clock clocks.WithTickerAndDelayedExecution + exposer exposer.PodVolumeExposer + metrics *metrics.ServerMetrics + nodeName string + logger logrus.FieldLogger + podResources corev1api.ResourceRequirements + dataPathMgr *datapath.Manager + vgdpCounter *exposer.VgdpCounter + preparingTimeout time.Duration + resourceTimeout time.Duration + cancelledPVB map[string]time.Time + dataMovePriorityClass string } // +kubebuilder:rbac:groups=velero.io,resources=podvolumebackups,verbs=get;list;watch;create;update;patch;delete @@ -833,6 +835,8 @@ func (r *PodVolumeBackupReconciler) setupExposeParam(pvb *velerov1api.PodVolumeB HostingPodTolerations: hostingPodTolerations, OperationTimeout: r.resourceTimeout, Resources: r.podResources, + // Priority class name for the data mover pod, retrieved from node-agent-configmap + PriorityClassName: r.dataMovePriorityClass, } } diff --git a/pkg/controller/pod_volume_backup_controller_test.go b/pkg/controller/pod_volume_backup_controller_test.go index f58a7d1a8..51e75edb2 100644 --- a/pkg/controller/pod_volume_backup_controller_test.go +++ b/pkg/controller/pod_volume_backup_controller_test.go @@ -151,6 +151,7 @@ func initPVBReconcilerWithError(needError ...error) (*PodVolumeBackupReconciler, corev1api.ResourceRequirements{}, metrics.NewServerMetrics(), velerotest.NewLogger(), + "", // dataMovePriorityClass ), nil } diff --git a/pkg/controller/pod_volume_restore_controller.go b/pkg/controller/pod_volume_restore_controller.go index 69ff782b6..ce0d312a0 100644 --- a/pkg/controller/pod_volume_restore_controller.go +++ b/pkg/controller/pod_volume_restore_controller.go @@ -56,38 +56,40 @@ import ( func NewPodVolumeRestoreReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, counter *exposer.VgdpCounter, nodeName string, preparingTimeout time.Duration, resourceTimeout time.Duration, podResources corev1api.ResourceRequirements, - logger logrus.FieldLogger) *PodVolumeRestoreReconciler { + logger logrus.FieldLogger, dataMovePriorityClass string) *PodVolumeRestoreReconciler { return &PodVolumeRestoreReconciler{ - client: client, - mgr: mgr, - kubeClient: kubeClient, - logger: logger.WithField("controller", "PodVolumeRestore"), - nodeName: nodeName, - clock: &clocks.RealClock{}, - podResources: podResources, - dataPathMgr: dataPathMgr, - vgdpCounter: counter, - preparingTimeout: preparingTimeout, - resourceTimeout: resourceTimeout, - exposer: exposer.NewPodVolumeExposer(kubeClient, logger), - cancelledPVR: make(map[string]time.Time), + client: client, + mgr: mgr, + kubeClient: kubeClient, + logger: logger.WithField("controller", "PodVolumeRestore"), + nodeName: nodeName, + clock: &clocks.RealClock{}, + podResources: podResources, + dataPathMgr: dataPathMgr, + vgdpCounter: counter, + preparingTimeout: preparingTimeout, + resourceTimeout: resourceTimeout, + exposer: exposer.NewPodVolumeExposer(kubeClient, logger), + cancelledPVR: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } type PodVolumeRestoreReconciler struct { - client client.Client - mgr manager.Manager - kubeClient kubernetes.Interface - logger logrus.FieldLogger - nodeName string - clock clocks.WithTickerAndDelayedExecution - podResources corev1api.ResourceRequirements - exposer exposer.PodVolumeExposer - dataPathMgr *datapath.Manager - vgdpCounter *exposer.VgdpCounter - preparingTimeout time.Duration - resourceTimeout time.Duration - cancelledPVR map[string]time.Time + client client.Client + mgr manager.Manager + kubeClient kubernetes.Interface + logger logrus.FieldLogger + nodeName string + clock clocks.WithTickerAndDelayedExecution + podResources corev1api.ResourceRequirements + exposer exposer.PodVolumeExposer + dataPathMgr *datapath.Manager + vgdpCounter *exposer.VgdpCounter + preparingTimeout time.Duration + resourceTimeout time.Duration + cancelledPVR map[string]time.Time + dataMovePriorityClass string } // +kubebuilder:rbac:groups=velero.io,resources=podvolumerestores,verbs=get;list;watch;create;update;patch;delete @@ -892,6 +894,8 @@ func (r *PodVolumeRestoreReconciler) setupExposeParam(pvr *velerov1api.PodVolume HostingPodTolerations: hostingPodTolerations, OperationTimeout: r.resourceTimeout, Resources: r.podResources, + // Priority class name for the data mover pod, retrieved from node-agent-configmap + PriorityClassName: r.dataMovePriorityClass, } } diff --git a/pkg/controller/pod_volume_restore_controller_test.go b/pkg/controller/pod_volume_restore_controller_test.go index ee29d2b6c..409672c32 100644 --- a/pkg/controller/pod_volume_restore_controller_test.go +++ b/pkg/controller/pod_volume_restore_controller_test.go @@ -617,7 +617,7 @@ func initPodVolumeRestoreReconcilerWithError(objects []runtime.Object, cliObj [] dataPathMgr := datapath.NewManager(1) - return NewPodVolumeRestoreReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, nil, "test-node", time.Minute*5, time.Minute, corev1api.ResourceRequirements{}, velerotest.NewLogger()), nil + return NewPodVolumeRestoreReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, nil, "test-node", time.Minute*5, time.Minute, corev1api.ResourceRequirements{}, velerotest.NewLogger(), ""), nil } func TestPodVolumeRestoreReconcile(t *testing.T) { diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 0e514fe6c..25b670d5e 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -82,6 +82,9 @@ type CSISnapshotExposeParam struct { // NodeOS specifies the OS of node that the source volume is attaching NodeOS string + + // PriorityClassName is the priority class name for the data mover pod + PriorityClassName string } // CSISnapshotExposeWaitParam define the input param for WaitExposed of CSI snapshots @@ -226,6 +229,7 @@ func (e *csiSnapshotExposer) Expose(ctx context.Context, ownerObject corev1api.O backupPVCReadOnly, spcNoRelabeling, csiExposeParam.NodeOS, + csiExposeParam.PriorityClassName, ) if err != nil { return errors.Wrap(err, "error to create backup pod") @@ -552,6 +556,7 @@ func (e *csiSnapshotExposer) createBackupPod( backupPVCReadOnly bool, spcNoRelabeling bool, nodeOS string, + priorityClassName string, ) (*corev1api.Pod, error) { podName := ownerObject.Name @@ -563,6 +568,11 @@ func (e *csiSnapshotExposer) createBackupPod( return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") } + // Log the priority class if it's set + if priorityClassName != "" { + e.log.Debugf("Setting priority class %q for data mover pod %s", priorityClassName, podName) + } + var gracePeriod int64 volumeMounts, volumeDevices, volumePath := kube.MakePodPVCAttachment(volumeName, backupPVC.Spec.VolumeMode, backupPVCReadOnly) volumeMounts = append(volumeMounts, podInfo.volumeMounts...) @@ -693,6 +703,7 @@ func (e *csiSnapshotExposer) createBackupPod( Resources: resources, }, }, + PriorityClassName: priorityClassName, ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: volumes, diff --git a/pkg/exposer/csi_snapshot_priority_test.go b/pkg/exposer/csi_snapshot_priority_test.go new file mode 100644 index 000000000..236d15acb --- /dev/null +++ b/pkg/exposer/csi_snapshot_priority_test.go @@ -0,0 +1,246 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exposer + +import ( + "testing" + "time" + + snapshotFake "github.com/kubernetes-csi/external-snapshotter/client/v8/clientset/versioned/fake" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1api "k8s.io/api/apps/v1" + corev1api "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util/kube" +) + +func TestCreateBackupPodWithPriorityClass(t *testing.T) { + testCases := []struct { + name string + nodeAgentConfigMapData string + expectedPriorityClass string + description string + }{ + { + name: "with priority class in config map", + nodeAgentConfigMapData: `{ + "priorityClassName": "high-priority" + }`, + expectedPriorityClass: "high-priority", + description: "Should set priority class from node-agent-configmap", + }, + { + name: "without priority class in config map", + nodeAgentConfigMapData: `{ + "loadAffinity": [] + }`, + expectedPriorityClass: "", + description: "Should have empty priority class when not specified", + }, + { + name: "empty config map", + nodeAgentConfigMapData: `{}`, + expectedPriorityClass: "", + description: "Should handle empty config map gracefully", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create node-agent config map + configMap := &corev1api.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent-config", + Namespace: velerov1api.DefaultNamespace, + }, + Data: map[string]string{ + "config": tc.nodeAgentConfigMapData, + }, + } + _, err = kubeClient.CoreV1().ConfigMaps(velerov1api.DefaultNamespace).Create(ctx, configMap, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the backup pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataUpload", + Name: "test-dataupload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a backup PVC + backupPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-backup-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create fake snapshot client + fakeSnapshotClient := snapshotFake.NewSimpleClientset() + + // Create CSI snapshot exposer + exposer := &csiSnapshotExposer{ + kubeClient: kubeClient, + csiSnapshotClient: fakeSnapshotClient.SnapshotV1(), + log: velerotest.NewLogger(), + } + + // Call createBackupPod + pod, err := exposer.createBackupPod( + ctx, + ownerObject, + backupPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + nil, // affinity + corev1api.ResourceRequirements{}, + false, // backupPVCReadOnly + false, // spcNoRelabeling + kube.NodeOSLinux, + tc.expectedPriorityClass, + ) + + require.NoError(t, err, tc.description) + assert.NotNil(t, pod) + assert.Equal(t, tc.expectedPriorityClass, pod.Spec.PriorityClassName, tc.description) + }) + } +} + +func TestCreateBackupPodWithMissingConfigMap(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client without config map + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the backup pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataUpload", + Name: "test-dataupload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a backup PVC + backupPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-backup-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create fake snapshot client + fakeSnapshotClient := snapshotFake.NewSimpleClientset() + + // Create CSI snapshot exposer + exposer := &csiSnapshotExposer{ + kubeClient: kubeClient, + csiSnapshotClient: fakeSnapshotClient.SnapshotV1(), + log: velerotest.NewLogger(), + } + + // Call createBackupPod + pod, err := exposer.createBackupPod( + ctx, + ownerObject, + backupPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + nil, // affinity + corev1api.ResourceRequirements{}, + false, // backupPVCReadOnly + false, // spcNoRelabeling + kube.NodeOSLinux, + "", // empty priority class since config map is missing + ) + + // Should succeed even when config map is missing + require.NoError(t, err, "Should succeed even when config map is missing") + assert.NotNil(t, pod) + assert.Empty(t, pod.Spec.PriorityClassName, "Should have empty priority class when config map is missing") +} diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index f794ea2ed..3ac919a12 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -69,6 +69,9 @@ type GenericRestoreExposeParam struct { // LoadAffinity specifies the node affinity of the backup pod LoadAffinity []*kube.LoadAffinity + + // PriorityClassName is the priority class name for the data mover pod + PriorityClassName string } // GenericRestoreExposer is the interfaces for a generic restore exposer @@ -156,6 +159,7 @@ func (e *genericRestoreExposer) Expose(ctx context.Context, ownerObject corev1ap param.Resources, param.NodeOS, affinity, + param.PriorityClassName, ) if err != nil { return errors.Wrapf(err, "error to create restore pod") @@ -422,6 +426,7 @@ func (e *genericRestoreExposer) createRestorePod( resources corev1api.ResourceRequirements, nodeOS string, affinity *kube.LoadAffinity, + priorityClassName string, ) (*corev1api.Pod, error) { restorePodName := ownerObject.Name restorePVCName := ownerObject.Name @@ -443,6 +448,11 @@ func (e *genericRestoreExposer) createRestorePod( return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") } + // Log the priority class if it's set + if priorityClassName != "" { + e.log.Debugf("Setting priority class %q for data mover pod %s", priorityClassName, restorePodName) + } + var gracePeriod int64 volumeMounts, volumeDevices, volumePath := kube.MakePodPVCAttachment(volumeName, targetPVC.Spec.VolumeMode, false) volumeMounts = append(volumeMounts, podInfo.volumeMounts...) @@ -556,6 +566,7 @@ func (e *genericRestoreExposer) createRestorePod( Resources: resources, }, }, + PriorityClassName: priorityClassName, ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: volumes, diff --git a/pkg/exposer/generic_restore_priority_test.go b/pkg/exposer/generic_restore_priority_test.go new file mode 100644 index 000000000..b67652b93 --- /dev/null +++ b/pkg/exposer/generic_restore_priority_test.go @@ -0,0 +1,236 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exposer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1api "k8s.io/api/apps/v1" + corev1api "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util/kube" +) + +// TestCreateRestorePodWithPriorityClass verifies that the priority class name is properly set in the restore pod +func TestCreateRestorePodWithPriorityClass(t *testing.T) { + testCases := []struct { + name string + nodeAgentConfigMapData string + expectedPriorityClass string + description string + }{ + { + name: "with priority class in config map", + nodeAgentConfigMapData: `{ + "priorityClassName": "low-priority" + }`, + expectedPriorityClass: "low-priority", + description: "Should set priority class from node-agent-configmap", + }, + { + name: "without priority class in config map", + nodeAgentConfigMapData: `{ + "loadAffinity": [] + }`, + expectedPriorityClass: "", + description: "Should have empty priority class when not specified", + }, + { + name: "empty config map", + nodeAgentConfigMapData: `{}`, + expectedPriorityClass: "", + description: "Should handle empty config map gracefully", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create node-agent config map + configMap := &corev1api.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent-config", + Namespace: velerov1api.DefaultNamespace, + }, + Data: map[string]string{ + "config": tc.nodeAgentConfigMapData, + }, + } + _, err = kubeClient.CoreV1().ConfigMaps(velerov1api.DefaultNamespace).Create(ctx, configMap, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the restore pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataDownload", + Name: "test-datadownload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a target PVC + targetPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-target-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create generic restore exposer + exposer := &genericRestoreExposer{ + kubeClient: kubeClient, + log: velerotest.NewLogger(), + } + + // Call createRestorePod + pod, err := exposer.createRestorePod( + ctx, + ownerObject, + targetPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + "", // selectedNode + corev1api.ResourceRequirements{}, + kube.NodeOSLinux, + nil, // affinity + tc.expectedPriorityClass, + ) + + require.NoError(t, err, tc.description) + assert.NotNil(t, pod) + assert.Equal(t, tc.expectedPriorityClass, pod.Spec.PriorityClassName, tc.description) + }) + } +} + +func TestCreateRestorePodWithMissingConfigMap(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client without config map + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the restore pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataDownload", + Name: "test-datadownload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a target PVC + targetPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-target-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create generic restore exposer + exposer := &genericRestoreExposer{ + kubeClient: kubeClient, + log: velerotest.NewLogger(), + } + + // Call createRestorePod + pod, err := exposer.createRestorePod( + ctx, + ownerObject, + targetPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + "", // selectedNode + corev1api.ResourceRequirements{}, + kube.NodeOSLinux, + nil, // affinity + "", // empty priority class since config map is missing + ) + + // Should succeed even when config map is missing + require.NoError(t, err, "Should succeed even when config map is missing") + assert.NotNil(t, pod) + assert.Empty(t, pod.Spec.PriorityClassName, "Should have empty priority class when config map is missing") +} diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index 657d75347..b5679889b 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -914,6 +914,7 @@ func TestCreateRestorePod(t *testing.T) { corev1api.ResourceRequirements{}, test.nodeOS, test.affinity, + "", // priority class name ) require.NoError(t, err) diff --git a/pkg/exposer/pod_volume.go b/pkg/exposer/pod_volume.go index bdde55175..012abb196 100644 --- a/pkg/exposer/pod_volume.go +++ b/pkg/exposer/pod_volume.go @@ -70,6 +70,9 @@ type PodVolumeExposeParam struct { // Type specifies the type of the expose, either backup or erstore Type string + + // PriorityClassName is the priority class name for the data mover pod + PriorityClassName string } // PodVolumeExposer is the interfaces for a pod volume exposer @@ -150,7 +153,7 @@ func (e *podVolumeExposer) Expose(ctx context.Context, ownerObject corev1api.Obj curLog.WithField("path", path).Infof("Host path is retrieved for pod %s, volume %s", param.ClientPodName, param.ClientPodVolume) - hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, param.HostingPodTolerations, pod.Spec.NodeName, param.Resources, nodeOS) + hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, param.HostingPodTolerations, pod.Spec.NodeName, param.Resources, nodeOS, param.PriorityClassName) if err != nil { return errors.Wrapf(err, "error to create hosting pod") } @@ -266,7 +269,7 @@ func (e *podVolumeExposer) CleanUp(ctx context.Context, ownerObject corev1api.Ob } func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject corev1api.ObjectReference, exposeType string, hostPath string, - operationTimeout time.Duration, label map[string]string, annotation map[string]string, toleration []corev1api.Toleration, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string) (*corev1api.Pod, error) { + operationTimeout time.Duration, label map[string]string, annotation map[string]string, toleration []corev1api.Toleration, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string, priorityClassName string) (*corev1api.Pod, error) { hostingPodName := ownerObject.Name containerName := string(ownerObject.UID) @@ -278,6 +281,11 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") } + // Log the priority class if it's set + if priorityClassName != "" { + e.log.Debugf("Setting priority class %q for data mover pod %s", priorityClassName, hostingPodName) + } + var gracePeriod int64 mountPropagation := corev1api.MountPropagationHostToContainer volumeMounts := []corev1api.VolumeMount{{ @@ -380,6 +388,7 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor Resources: resources, }, }, + PriorityClassName: priorityClassName, ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: volumes, diff --git a/pkg/install/daemonset.go b/pkg/install/daemonset.go index d875e9350..2c4bee090 100644 --- a/pkg/install/daemonset.go +++ b/pkg/install/daemonset.go @@ -179,6 +179,7 @@ func DaemonSet(namespace string, opts ...podTemplateOption) *appsv1api.DaemonSet Resources: c.resources, }, }, + PriorityClassName: c.priorityClassName, }, }, }, diff --git a/pkg/install/daemonset_test.go b/pkg/install/daemonset_test.go index 8ad00fb3b..77a1ebae8 100644 --- a/pkg/install/daemonset_test.go +++ b/pkg/install/daemonset_test.go @@ -81,3 +81,37 @@ func TestDaemonSet(t *testing.T) { assert.Equal(t, (*corev1api.PodSecurityContext)(nil), ds.Spec.Template.Spec.SecurityContext) assert.Equal(t, (*corev1api.SecurityContext)(nil), ds.Spec.Template.Spec.Containers[0].SecurityContext) } + +func TestDaemonSetWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + priorityClassName string + expectedValue string + }{ + { + name: "with priority class name", + priorityClassName: "high-priority", + expectedValue: "high-priority", + }, + { + name: "without priority class name", + priorityClassName: "", + expectedValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a daemonset with the priority class name option + var opts []podTemplateOption + if tc.priorityClassName != "" { + opts = append(opts, WithPriorityClassName(tc.priorityClassName)) + } + + daemonset := DaemonSet("velero", opts...) + + // Verify the priority class name is set correctly + assert.Equal(t, tc.expectedValue, daemonset.Spec.Template.Spec.PriorityClassName) + }) + } +} diff --git a/pkg/install/deployment.go b/pkg/install/deployment.go index 538958789..91acce344 100644 --- a/pkg/install/deployment.go +++ b/pkg/install/deployment.go @@ -62,6 +62,7 @@ type podTemplateConfig struct { forWindows bool kubeletRootDir string nodeAgentDisableHostPath bool + priorityClassName string } func WithImage(image string) podTemplateOption { @@ -223,6 +224,12 @@ func WithItemBlockWorkerCount(itemBlockWorkerCount int) podTemplateOption { } } +func WithPriorityClassName(priorityClassName string) podTemplateOption { + return func(c *podTemplateConfig) { + c.priorityClassName = priorityClassName + } +} + func WithForWindows() podTemplateOption { return func(c *podTemplateConfig) { c.forWindows = true @@ -407,6 +414,7 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1api.Deployme }, }, }, + PriorityClassName: c.priorityClassName, }, }, }, diff --git a/pkg/install/deployment_test.go b/pkg/install/deployment_test.go index ac53e6033..b8aeaa9dd 100644 --- a/pkg/install/deployment_test.go +++ b/pkg/install/deployment_test.go @@ -103,3 +103,37 @@ func TestDeployment(t *testing.T) { assert.Equal(t, "linux", deploy.Spec.Template.Spec.NodeSelector["kubernetes.io/os"]) assert.Equal(t, "linux", string(deploy.Spec.Template.Spec.OS.Name)) } + +func TestDeploymentWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + priorityClassName string + expectedValue string + }{ + { + name: "with priority class name", + priorityClassName: "high-priority", + expectedValue: "high-priority", + }, + { + name: "without priority class name", + priorityClassName: "", + expectedValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a deployment with the priority class name option + var opts []podTemplateOption + if tc.priorityClassName != "" { + opts = append(opts, WithPriorityClassName(tc.priorityClassName)) + } + + deployment := Deployment("velero", opts...) + + // Verify the priority class name is set correctly + assert.Equal(t, tc.expectedValue, deployment.Spec.Template.Spec.PriorityClassName) + }) + } +} diff --git a/pkg/install/resources.go b/pkg/install/resources.go index 1888a4852..344fc599f 100644 --- a/pkg/install/resources.go +++ b/pkg/install/resources.go @@ -273,6 +273,8 @@ type VeleroOptions struct { ItemBlockWorkerCount int KubeletRootDir string NodeAgentDisableHostPath bool + ServerPriorityClassName string + NodeAgentPriorityClassName string } func AllCRDs() *unstructured.UnstructuredList { @@ -362,6 +364,10 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList { WithItemBlockWorkerCount(o.ItemBlockWorkerCount), } + if o.ServerPriorityClassName != "" { + deployOpts = append(deployOpts, WithPriorityClassName(o.ServerPriorityClassName)) + } + if len(o.Features) > 0 { deployOpts = append(deployOpts, WithFeatures(o.Features)) } @@ -424,6 +430,10 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList { dsOpts = append(dsOpts, WithKubeletRootDir(o.KubeletRootDir)) } + if o.NodeAgentPriorityClassName != "" { + dsOpts = append(dsOpts, WithPriorityClassName(o.NodeAgentPriorityClassName)) + } + if o.UseNodeAgent { ds := DaemonSet(o.Namespace, dsOpts...) if err := appendUnstructured(resources, ds); err != nil { diff --git a/pkg/install/resources_test.go b/pkg/install/resources_test.go index 6118c04bb..bafa3a684 100644 --- a/pkg/install/resources_test.go +++ b/pkg/install/resources_test.go @@ -117,3 +117,132 @@ func TestAllResources(t *testing.T) { assert.Len(t, ds, 2) } + +func TestAllResourcesWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + serverPriorityClassName string + nodeAgentPriorityClassName string + useNodeAgent bool + }{ + { + name: "with same priority class for server and node agent", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "high-priority", + useNodeAgent: true, + }, + { + name: "with different priority classes for server and node agent", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "medium-priority", + useNodeAgent: true, + }, + { + name: "with only server priority class", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "", + useNodeAgent: true, + }, + { + name: "with only node agent priority class", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "medium-priority", + useNodeAgent: true, + }, + { + name: "with priority class name without node agent", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "medium-priority", + useNodeAgent: false, + }, + { + name: "without priority class name with node agent", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "", + useNodeAgent: true, + }, + { + name: "without priority class name without node agent", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "", + useNodeAgent: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create VeleroOptions with the priority class names + options := &VeleroOptions{ + Namespace: "velero", + UseNodeAgent: tc.useNodeAgent, + ServerPriorityClassName: tc.serverPriorityClassName, + NodeAgentPriorityClassName: tc.nodeAgentPriorityClassName, + } + + // Generate all resources + resources := AllResources(options) + + // Find the deployment and verify priority class name + deploymentFound := false + daemonsetFound := false + + for i := range resources.Items { + item := resources.Items[i] + + // Check deployment + if item.GetKind() == "Deployment" && item.GetName() == "velero" { + deploymentFound = true + + // Extract priority class name from the unstructured object + priorityClassName, found, err := unstructured.NestedString( + item.Object, + "spec", "template", "spec", "priorityClassName", + ) + + require.NoError(t, err) + if tc.serverPriorityClassName != "" { + assert.True(t, found, "Server priorityClassName should be set") + assert.Equal(t, tc.serverPriorityClassName, priorityClassName, + "Server deployment should have the correct priority class") + } else { + // If no priority class name was provided, it might not be set at all + if found { + assert.Empty(t, priorityClassName) + } + } + } + + // Check daemonset if node agent is enabled + if tc.useNodeAgent && item.GetKind() == "DaemonSet" && item.GetName() == "node-agent" { + daemonsetFound = true + + // Extract priority class name from the unstructured object + priorityClassName, found, err := unstructured.NestedString( + item.Object, + "spec", "template", "spec", "priorityClassName", + ) + + require.NoError(t, err) + if tc.nodeAgentPriorityClassName != "" { + assert.True(t, found, "Node agent priorityClassName should be set") + assert.Equal(t, tc.nodeAgentPriorityClassName, priorityClassName, + "Node agent daemonset should have the correct priority class") + } else { + // If no priority class name was provided, it might not be set at all + if found { + assert.Empty(t, priorityClassName) + } + } + } + } + + // Verify we found the deployment + assert.True(t, deploymentFound, "Deployment should be present in resources") + + // Verify we found the daemonset if node agent is enabled + if tc.useNodeAgent { + assert.True(t, daemonsetFound, "DaemonSet should be present when UseNodeAgent is true") + } + }) + } +} diff --git a/pkg/nodeagent/node_agent.go b/pkg/nodeagent/node_agent.go index 7ca9f602e..d83a9eba3 100644 --- a/pkg/nodeagent/node_agent.go +++ b/pkg/nodeagent/node_agent.go @@ -112,6 +112,9 @@ type Configs struct { // PodResources is the resource config for various types of pods launched by node-agent, i.e., data mover pods. PodResources *kube.PodResources `json:"podResources,omitempty"` + + // PriorityClassName is the priority class name for data mover pods created by the node agent + PriorityClassName string `json:"priorityClassName,omitempty"` } func IsRunningOnLinux(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error { diff --git a/pkg/nodeagent/node_agent_test.go b/pkg/nodeagent/node_agent_test.go index 292c14a61..aaf851d6e 100644 --- a/pkg/nodeagent/node_agent_test.go +++ b/pkg/nodeagent/node_agent_test.go @@ -246,6 +246,8 @@ func TestGetConfigs(t *testing.T) { cmWithInvalidDataFormat := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "wrong").Result() cmWithoutCocurrentData := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"someothers\":{\"someother\": 10}}").Result() cmWithValidData := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"loadConcurrency\":{\"globalConfig\": 5}}").Result() + cmWithPriorityClass := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"priorityClassName\": \"high-priority\"}").Result() + cmWithPriorityClassAndOther := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"priorityClassName\": \"low-priority\", \"loadConcurrency\":{\"globalConfig\": 3}}").Result() tests := []struct { name string @@ -305,6 +307,29 @@ func TestGetConfigs(t *testing.T) { }, }, }, + { + name: "configmap with priority class name", + namespace: "fake-ns", + kubeClientObj: []runtime.Object{ + cmWithPriorityClass, + }, + expectResult: &Configs{ + PriorityClassName: "high-priority", + }, + }, + { + name: "configmap with priority class and other configs", + namespace: "fake-ns", + kubeClientObj: []runtime.Object{ + cmWithPriorityClassAndOther, + }, + expectResult: &Configs{ + PriorityClassName: "low-priority", + LoadConcurrency: &LoadConcurrency{ + GlobalConfig: 3, + }, + }, + }, } for _, test := range tests { @@ -321,10 +346,16 @@ func TestGetConfigs(t *testing.T) { if test.expectResult == nil { assert.Nil(t, result) - } else if test.expectResult.LoadConcurrency == nil { - assert.Nil(t, result.LoadConcurrency) } else { - assert.Equal(t, *test.expectResult.LoadConcurrency, *result.LoadConcurrency) + // Check PriorityClassName + assert.Equal(t, test.expectResult.PriorityClassName, result.PriorityClassName) + + // Check LoadConcurrency + if test.expectResult.LoadConcurrency == nil { + assert.Nil(t, result.LoadConcurrency) + } else { + assert.Equal(t, *test.expectResult.LoadConcurrency, *result.LoadConcurrency) + } } } else { assert.EqualError(t, err, test.expectErr) diff --git a/pkg/repository/maintenance/maintenance.go b/pkg/repository/maintenance/maintenance.go index c887e8813..55fc0bc18 100644 --- a/pkg/repository/maintenance/maintenance.go +++ b/pkg/repository/maintenance/maintenance.go @@ -61,6 +61,10 @@ type JobConfigs struct { // KeepLatestMaintenanceJobs is the number of latest maintenance jobs to keep for the repository. KeepLatestMaintenanceJobs *int `json:"keepLatestMaintenanceJobs,omitempty"` + + // PriorityClassName is the priority class name for the maintenance job pod + // Note: This is only read from the global configuration, not per-repository + PriorityClassName string `json:"priorityClassName,omitempty"` } func GenerateJobName(repo string) string { @@ -278,6 +282,11 @@ func getJobConfig( if result.KeepLatestMaintenanceJobs == nil && globalResult.KeepLatestMaintenanceJobs != nil { result.KeepLatestMaintenanceJobs = globalResult.KeepLatestMaintenanceJobs } + + // Priority class is only read from global config, not per-repository + if globalResult.PriorityClassName != "" { + result.PriorityClassName = globalResult.PriorityClassName + } } return result, nil @@ -424,14 +433,14 @@ func StartNewJob(cli client.Client, ctx context.Context, repo *velerov1api.Backu log.Info("Starting maintenance repo") - maintenanceJob, err := buildJob(cli, ctx, repo, bsl.Name, jobConfig, podResources, logLevel, logFormat) + maintenanceJob, err := buildJob(cli, ctx, repo, bsl.Name, jobConfig, podResources, logLevel, logFormat, log) if err != nil { return "", errors.Wrap(err, "error to build maintenance job") } log = log.WithField("job", fmt.Sprintf("%s/%s", maintenanceJob.Namespace, maintenanceJob.Name)) - if err := cli.Create(context.TODO(), maintenanceJob); err != nil { + if err := cli.Create(ctx, maintenanceJob); err != nil { return "", errors.Wrap(err, "error to create maintenance job") } @@ -440,6 +449,26 @@ func StartNewJob(cli client.Client, ctx context.Context, repo *velerov1api.Backu return maintenanceJob.Name, nil } +func getPriorityClassName(ctx context.Context, cli client.Client, config *JobConfigs, logger logrus.FieldLogger) string { + // Use the priority class name from the global job configuration if available + // Note: Priority class is only read from global config, not per-repository + if config != nil && config.PriorityClassName != "" { + // Validate that the priority class exists in the cluster + if err := kube.ValidatePriorityClassWithClient(ctx, cli, config.PriorityClassName); err != nil { + if apierrors.IsNotFound(err) { + logger.Warnf("Priority class %q not found in cluster. Job creation may fail if the priority class doesn't exist when jobs are scheduled.", config.PriorityClassName) + } else { + logger.WithError(err).Warnf("Failed to validate priority class %q", config.PriorityClassName) + } + // Still return the priority class name to let Kubernetes handle the error + return config.PriorityClassName + } + logger.Infof("Validated priority class %q exists in cluster", config.PriorityClassName) + return config.PriorityClassName + } + return "" +} + func buildJob( cli client.Client, ctx context.Context, @@ -449,6 +478,7 @@ func buildJob( podResources kube.PodResources, logLevel logrus.Level, logFormat *logging.FormatFlag, + logger logrus.FieldLogger, ) (*batchv1api.Job, error) { // Get the Velero server deployment deployment := &appsv1api.Deployment{} @@ -559,6 +589,7 @@ func buildJob( TerminationMessagePolicy: corev1api.TerminationMessageFallbackToLogsOnError, }, }, + PriorityClassName: getPriorityClassName(ctx, cli, config, logger), RestartPolicy: corev1api.RestartPolicyNever, SecurityContext: podSecurityContext, Volumes: volumes, diff --git a/pkg/repository/maintenance/maintenance_test.go b/pkg/repository/maintenance/maintenance_test.go index d1ffb0f7c..8b377edaf 100644 --- a/pkg/repository/maintenance/maintenance_test.go +++ b/pkg/repository/maintenance/maintenance_test.go @@ -29,6 +29,7 @@ import ( "github.com/stretchr/testify/require" batchv1api "k8s.io/api/batch/v1" corev1api "k8s.io/api/core/v1" + schedulingv1 "k8s.io/api/scheduling/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -1101,6 +1102,7 @@ func TestBuildJob(t *testing.T) { *tc.m.PodResources, tc.logLevel, tc.logFormat, + logrus.New(), ) // Check the error @@ -1300,3 +1302,187 @@ func mockBackupRepo() *velerov1api.BackupRepository { }, } } + +func TestGetPriorityClassName(t *testing.T) { + testCases := []struct { + name string + config *JobConfigs + priorityClassExists bool + expectedValue string + expectedLogContains string + expectedLogLevel string + }{ + { + name: "empty priority class name should return empty string", + config: &JobConfigs{PriorityClassName: ""}, + expectedValue: "", + expectedLogContains: "", + }, + { + name: "nil config should return empty string", + config: nil, + expectedValue: "", + expectedLogContains: "", + }, + { + name: "existing priority class should log info and return name", + config: &JobConfigs{PriorityClassName: "high-priority"}, + priorityClassExists: true, + expectedValue: "high-priority", + expectedLogContains: "Validated priority class \\\"high-priority\\\" exists in cluster", + expectedLogLevel: "info", + }, + { + name: "non-existing priority class should log warning and still return name", + config: &JobConfigs{PriorityClassName: "missing-priority"}, + priorityClassExists: false, + expectedValue: "missing-priority", + expectedLogContains: "Priority class \\\"missing-priority\\\" not found in cluster", + expectedLogLevel: "warning", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a new scheme and add necessary API types + localScheme := runtime.NewScheme() + err := schedulingv1.AddToScheme(localScheme) + require.NoError(t, err) + + // Create fake client builder + clientBuilder := fake.NewClientBuilder().WithScheme(localScheme) + + // Add priority class if it should exist + if tc.priorityClassExists { + priorityClass := &schedulingv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.config.PriorityClassName, + }, + Value: 1000, + } + clientBuilder = clientBuilder.WithObjects(priorityClass) + } + + client := clientBuilder.Build() + + // Capture logs + var logBuffer strings.Builder + logger := logrus.New() + logger.SetOutput(&logBuffer) + logger.SetLevel(logrus.InfoLevel) + + // Call the function + result := getPriorityClassName(t.Context(), client, tc.config, logger) + + // Verify the result + assert.Equal(t, tc.expectedValue, result) + + // Verify log output + logOutput := logBuffer.String() + if tc.expectedLogContains != "" { + assert.Contains(t, logOutput, tc.expectedLogContains) + } + + // Verify log level + if tc.expectedLogLevel == "warning" { + assert.Contains(t, logOutput, "level=warning") + } else if tc.expectedLogLevel == "info" { + assert.Contains(t, logOutput, "level=info") + } + }) + } +} + +func TestBuildJobWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + priorityClassName string + expectedValue string + }{ + { + name: "with priority class name", + priorityClassName: "high-priority", + expectedValue: "high-priority", + }, + { + name: "without priority class name", + priorityClassName: "", + expectedValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a new scheme and add necessary API types + localScheme := runtime.NewScheme() + err := velerov1api.AddToScheme(localScheme) + require.NoError(t, err) + err = appsv1api.AddToScheme(localScheme) + require.NoError(t, err) + err = batchv1api.AddToScheme(localScheme) + require.NoError(t, err) + err = schedulingv1.AddToScheme(localScheme) + require.NoError(t, err) + // Create a fake client + client := fake.NewClientBuilder().WithScheme(localScheme).Build() + + // Create a deployment with the specified priority class name + deployment := &appsv1api.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "velero", + Namespace: "velero", + }, + Spec: appsv1api.DeploymentSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "velero", + Image: "velero/velero:latest", + }, + }, + PriorityClassName: tc.priorityClassName, + }, + }, + }, + } + + // Create a backup repository + repo := &velerov1api.BackupRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-repo", + Namespace: "velero", + }, + Spec: velerov1api.BackupRepositorySpec{ + VolumeNamespace: "velero", + BackupStorageLocation: "default", + }, + } + + // Create the deployment in the fake client + err = client.Create(t.Context(), deployment) + require.NoError(t, err) + + // Create minimal job configs and resources + jobConfig := &JobConfigs{ + PriorityClassName: tc.priorityClassName, + } + podResources := kube.PodResources{ + CPURequest: "100m", + MemoryRequest: "128Mi", + CPULimit: "200m", + MemoryLimit: "256Mi", + } + logLevel := logrus.InfoLevel + logFormat := logging.NewFormatFlag() + logFormat.Set("text") + + // Call buildJob + job, err := buildJob(client, t.Context(), repo, "default", jobConfig, podResources, logLevel, logFormat, logrus.New()) + require.NoError(t, err) + + // Verify the priority class name is set correctly + assert.Equal(t, tc.expectedValue, job.Spec.Template.Spec.PriorityClassName) + }) + } +} diff --git a/pkg/util/collections/includes_excludes.go b/pkg/util/collections/includes_excludes.go index 1cff16f3e..19ef926f0 100644 --- a/pkg/util/collections/includes_excludes.go +++ b/pkg/util/collections/includes_excludes.go @@ -19,6 +19,8 @@ package collections import ( "strings" + "github.com/vmware-tanzu/velero/internal/resourcepolicies" + "github.com/gobwas/glob" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -105,14 +107,63 @@ func (ie *IncludesExcludes) ShouldInclude(s string) bool { return ie.includes.Len() == 0 || ie.includes.Has("*") || ie.includes.match(s) } +// IncludesString returns a string containing all of the includes, separated by commas, or * if the +// list is empty. +func (ie *IncludesExcludes) IncludesString() string { + return asString(ie.GetIncludes(), "*") +} + +// ExcludesString returns a string containing all of the excludes, separated by commas, or if the +// list is empty. +func (ie *IncludesExcludes) ExcludesString() string { + return asString(ie.GetExcludes(), "") +} + +// IncludeEverything returns true if the includes list is empty or '*' +// and the excludes list is empty, or false otherwise. +func (ie *IncludesExcludes) IncludeEverything() bool { + return ie.excludes.Len() == 0 && (ie.includes.Len() == 0 || (ie.includes.Len() == 1 && ie.includes.Has("*"))) +} + +// GetResourceIncludesExcludes takes the lists of resources to include and exclude, uses the +// discovery helper to resolve them to fully-qualified group-resource names, and returns an +// IncludesExcludes list. +func GetResourceIncludesExcludes(helper discovery.Helper, includes, excludes []string) *IncludesExcludes { + resources := generateIncludesExcludes( + includes, + excludes, + func(item string) string { + gvr, _, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) + if err != nil { + // If we can't resolve it, return it as-is. This prevents the generated + // includes-excludes list from including *everything*, if none of the includes + // can be resolved. ref. https://github.com/vmware-tanzu/velero/issues/2461 + return item + } + + gr := gvr.GroupResource() + return gr.String() + }, + ) + + return resources +} + +func asString(in []string, empty string) string { + if len(in) == 0 { + return empty + } + return strings.Join(in, ", ") +} + // IncludesExcludesInterface is used as polymorphic IncludesExcludes for Global and scope // resources Include/Exclude. type IncludesExcludesInterface interface { - // Check whether the type name passed in by parameter should be included. + // ShouldInclude checks whether the type name passed in by parameter should be included. // typeName should be k8s.io/apimachinery/pkg/runtime/schema GroupResource's String() result. ShouldInclude(typeName string) bool - // Check whether the type name passed in by parameter should be excluded. + // ShouldExclude checks whether the type name passed in by parameter should be excluded. // typeName should be k8s.io/apimachinery/pkg/runtime/schema GroupResource's String() result. ShouldExclude(typeName string) bool } @@ -188,6 +239,20 @@ func (ie *GlobalIncludesExcludes) ShouldExclude(typeName string) bool { return false } +func GetGlobalResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, includes, excludes []string, includeClusterResources *bool, nsIncludesExcludes IncludesExcludes) *GlobalIncludesExcludes { + ret := &GlobalIncludesExcludes{ + resourceFilter: *GetResourceIncludesExcludes(helper, includes, excludes), + includeClusterResources: includeClusterResources, + namespaceFilter: nsIncludesExcludes, + helper: helper, + logger: logger, + } + + logger.Infof("Including resources: %s", ret.resourceFilter.IncludesString()) + logger.Infof("Excluding resources: %s", ret.resourceFilter.ExcludesString()) + return ret +} + type ScopeIncludesExcludes struct { namespaceScopedResourceFilter IncludesExcludes // namespace-scoped resource filter clusterScopedResourceFilter IncludesExcludes // cluster-scoped resource filter @@ -259,29 +324,64 @@ func (ie *ScopeIncludesExcludes) ShouldExclude(typeName string) bool { return false } -// IncludesString returns a string containing all of the includes, separated by commas, or * if the -// list is empty. -func (ie *IncludesExcludes) IncludesString() string { - return asString(ie.GetIncludes(), "*") -} - -// ExcludesString returns a string containing all of the excludes, separated by commas, or if the -// list is empty. -func (ie *IncludesExcludes) ExcludesString() string { - return asString(ie.GetExcludes(), "") -} - -func asString(in []string, empty string) string { - if len(in) == 0 { - return empty +func (ie *ScopeIncludesExcludes) CombineWithPolicy(policy *resourcepolicies.IncludeExcludePolicy) { + if policy == nil { + return } - return strings.Join(in, ", ") -} - -// IncludeEverything returns true if the includes list is empty or '*' -// and the excludes list is empty, or false otherwise. -func (ie *IncludesExcludes) IncludeEverything() bool { - return ie.excludes.Len() == 0 && (ie.includes.Len() == 0 || (ie.includes.Len() == 1 && ie.includes.Has("*"))) + mapFunc := scopeResourceMapFunc(ie.helper) + for _, item := range policy.ExcludedNamespaceScopedResources { + resolvedItem := mapFunc(item, true) + if resolvedItem == "" { + continue + } + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not include this item and this item is not yet in the excludes filter. + if !ie.namespaceScopedResourceFilter.includes.match(resolvedItem) && + !ie.namespaceScopedResourceFilter.excludes.match(resolvedItem) { + ie.namespaceScopedResourceFilter.Excludes(resolvedItem) + } + } + for _, item := range policy.IncludedNamespaceScopedResources { + resolvedItem := mapFunc(item, true) + if resolvedItem == "" { + continue + } + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not exclude this item and this item is not yet in the includes filter. + if !ie.namespaceScopedResourceFilter.includes.match(resolvedItem) && + !ie.namespaceScopedResourceFilter.excludes.match(resolvedItem) { + ie.namespaceScopedResourceFilter.Includes(resolvedItem) + } + } + for _, item := range policy.ExcludedClusterScopedResources { + resolvedItem := mapFunc(item, false) + if resolvedItem == "" { + continue + } + if !ie.clusterScopedResourceFilter.includes.match(resolvedItem) && + !ie.clusterScopedResourceFilter.excludes.match(resolvedItem) { + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not exclude this item and this item is not yet in the includes filter. + ie.clusterScopedResourceFilter.Excludes(resolvedItem) + } + } + for _, item := range policy.IncludedClusterScopedResources { + resolvedItem := mapFunc(item, false) + if resolvedItem == "" { + continue + } + if !ie.clusterScopedResourceFilter.includes.match(resolvedItem) && + !ie.clusterScopedResourceFilter.excludes.match(resolvedItem) { + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not exclude this item and this item is not yet in the includes filter. + ie.clusterScopedResourceFilter.Includes(resolvedItem) + } + } + ie.logger.Infof("Scoped resource includes/excludes after combining with resource policy") + ie.logger.Infof("Including namespace-scoped resources: %s", ie.namespaceScopedResourceFilter.IncludesString()) + ie.logger.Infof("Excluding namespace-scoped resources: %s", ie.namespaceScopedResourceFilter.ExcludesString()) + ie.logger.Infof("Including cluster-scoped resources: %s", ie.clusterScopedResourceFilter.GetIncludes()) + ie.logger.Infof("Excluding cluster-scoped resources: %s", ie.clusterScopedResourceFilter.ExcludesString()) } func newScopeIncludesExcludes(nsIncludesExcludes IncludesExcludes, helper discovery.Helper, logger logrus.FieldLogger) *ScopeIncludesExcludes { @@ -302,6 +402,43 @@ func newScopeIncludesExcludes(nsIncludesExcludes IncludesExcludes, helper discov return ret } +// GetScopeResourceIncludesExcludes function is similar with GetResourceIncludesExcludes, +// but it's used for scoped Includes/Excludes, and can handle both cluster-scoped and namespace-scoped resources. +func GetScopeResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, namespaceIncludes, namespaceExcludes, clusterIncludes, clusterExcludes []string, nsIncludesExcludes IncludesExcludes) *ScopeIncludesExcludes { + ret := generateScopedIncludesExcludes( + namespaceIncludes, + namespaceExcludes, + clusterIncludes, + clusterExcludes, + scopeResourceMapFunc(helper), + nsIncludesExcludes, + helper, + logger, + ) + logger.Infof("Scoped resource includes/excludes after initialization") + logger.Infof("Including namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.IncludesString()) + logger.Infof("Excluding namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.ExcludesString()) + logger.Infof("Including cluster-scoped resources: %s", ret.clusterScopedResourceFilter.GetIncludes()) + logger.Infof("Excluding cluster-scoped resources: %s", ret.clusterScopedResourceFilter.ExcludesString()) + + return ret +} + +func scopeResourceMapFunc(helper discovery.Helper) func(string, bool) string { + return func(item string, namespaced bool) string { + gvr, resource, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) + if err != nil { + return item + } + if resource.Namespaced != namespaced { + return "" + } + + gr := gvr.GroupResource() + return gr.String() + } +} + // ValidateIncludesExcludes checks provided lists of included and excluded // items to ensure they are a valid set of IncludesExcludes data. func ValidateIncludesExcludes(includesList, excludesList []string) []error { @@ -470,97 +607,16 @@ func generateFilter(filter globStringSet, resources []string, mapFunc func(strin } } -// GetResourceIncludesExcludes takes the lists of resources to include and exclude, uses the -// discovery helper to resolve them to fully-qualified group-resource names, and returns an -// IncludesExcludes list. -func GetResourceIncludesExcludes(helper discovery.Helper, includes, excludes []string) *IncludesExcludes { - resources := generateIncludesExcludes( - includes, - excludes, - func(item string) string { - gvr, _, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) - if err != nil { - // If we can't resolve it, return it as-is. This prevents the generated - // includes-excludes list from including *everything*, if none of the includes - // can be resolved. ref. https://github.com/vmware-tanzu/velero/issues/2461 - return item - } - - gr := gvr.GroupResource() - return gr.String() - }, - ) - - return resources -} - -func GetGlobalResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, includes, excludes []string, includeClusterResources *bool, nsIncludesExcludes IncludesExcludes) *GlobalIncludesExcludes { - ret := &GlobalIncludesExcludes{ - resourceFilter: *GetResourceIncludesExcludes(helper, includes, excludes), - includeClusterResources: includeClusterResources, - namespaceFilter: nsIncludesExcludes, - helper: helper, - logger: logger, - } - - logger.Infof("Including resources: %s", ret.resourceFilter.IncludesString()) - logger.Infof("Excluding resources: %s", ret.resourceFilter.ExcludesString()) - return ret -} - -// GetScopeResourceIncludesExcludes function is similar with GetResourceIncludesExcludes, -// but it's used for scoped Includes/Excludes, and can handle both cluster-scoped and namespace-scoped resources. -func GetScopeResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, namespaceIncludes, namespaceExcludes, clusterIncludes, clusterExcludes []string, nsIncludesExcludes IncludesExcludes) *ScopeIncludesExcludes { - ret := generateScopedIncludesExcludes( - namespaceIncludes, - namespaceExcludes, - clusterIncludes, - clusterExcludes, - func(item string, namespaced bool) string { - gvr, resource, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) - if err != nil { - return item - } - if resource.Namespaced != namespaced { - return "" - } - - gr := gvr.GroupResource() - return gr.String() - }, - nsIncludesExcludes, - helper, - logger, - ) - logger.Infof("Including namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.IncludesString()) - logger.Infof("Excluding namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.ExcludesString()) - logger.Infof("Including cluster-scoped resources: %s", ret.clusterScopedResourceFilter.GetIncludes()) - logger.Infof("Excluding cluster-scoped resources: %s", ret.clusterScopedResourceFilter.ExcludesString()) - - return ret -} - // UseOldResourceFilters checks whether to use old resource filters (IncludeClusterResources, // IncludedResources and ExcludedResources), depending the backup's filters setting. // New filters are IncludedClusterScopedResources, ExcludedClusterScopedResources, // IncludedNamespaceScopedResources and ExcludedNamespaceScopedResources. +// If all resource filters are none, it is treated as using new parameter filters. func UseOldResourceFilters(backupSpec velerov1api.BackupSpec) bool { - // If all resource filters are none, it is treated as using old parameter filters. - if backupSpec.IncludeClusterResources == nil && - len(backupSpec.IncludedResources) == 0 && - len(backupSpec.ExcludedResources) == 0 && - len(backupSpec.IncludedClusterScopedResources) == 0 && - len(backupSpec.ExcludedClusterScopedResources) == 0 && - len(backupSpec.IncludedNamespaceScopedResources) == 0 && - len(backupSpec.ExcludedNamespaceScopedResources) == 0 { - return true - } - if backupSpec.IncludeClusterResources != nil || len(backupSpec.IncludedResources) > 0 || len(backupSpec.ExcludedResources) > 0 { return true } - return false } diff --git a/pkg/util/collections/includes_excludes_test.go b/pkg/util/collections/includes_excludes_test.go index a3466066c..34e5f9987 100644 --- a/pkg/util/collections/includes_excludes_test.go +++ b/pkg/util/collections/includes_excludes_test.go @@ -19,6 +19,8 @@ package collections import ( "testing" + "github.com/vmware-tanzu/velero/internal/resourcepolicies" + "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" @@ -741,6 +743,100 @@ func TestGetScopedResourceIncludesExcludes(t *testing.T) { } } +func TestScopeIncludesExcludes_CombineWithPolicy(t *testing.T) { + apiResources := []*test.APIResource{test.Deployments(), test.Pods(), test.ConfigMaps(), test.Secrets(), test.PVs(), test.CRDs(), test.ServiceAccounts()} + tests := []struct { + name string + namespaceScopedIncludes []string + namespaceScopedExcludes []string + clusterScopedIncludes []string + clusterScopedExcludes []string + policy *resourcepolicies.IncludeExcludePolicy + verify func(sie ScopeIncludesExcludes) bool + }{ + { + name: "When policy is nil, the original includes excludes filters should not change", + namespaceScopedIncludes: []string{"deployments", "pods"}, + namespaceScopedExcludes: []string{"configmaps"}, + clusterScopedIncludes: []string{"persistentvolumes"}, + clusterScopedExcludes: []string{"crds"}, + policy: nil, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.clusterScopedResourceFilter.ShouldInclude("persistentvolumes") && + !sie.clusterScopedResourceFilter.ShouldInclude("crds") && + sie.namespaceScopedResourceFilter.ShouldInclude("deployments") && + !sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") + }, + }, + { + name: "policy includes excludes should be merged to the original includes excludes when there's no conflict", + namespaceScopedIncludes: []string{"pods"}, + namespaceScopedExcludes: []string{"configmaps"}, + clusterScopedIncludes: []string{}, + clusterScopedExcludes: []string{"crds"}, + policy: &resourcepolicies.IncludeExcludePolicy{ + IncludedNamespaceScopedResources: []string{"deployments"}, + ExcludedNamespaceScopedResources: []string{"secrets"}, + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{}, + }, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.clusterScopedResourceFilter.ShouldInclude("persistentvolumes") && + !sie.clusterScopedResourceFilter.ShouldInclude("crds") && + sie.namespaceScopedResourceFilter.ShouldInclude("deployments") && + !sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") && + !sie.namespaceScopedResourceFilter.ShouldInclude("secrets") + }, + }, + { + name: "when there are conflicts, the existing includes excludes filters have higher priorities", + namespaceScopedIncludes: []string{"pods", "deployments"}, + namespaceScopedExcludes: []string{"configmaps"}, + clusterScopedIncludes: []string{"crds"}, + clusterScopedExcludes: []string{"persistentvolumes"}, + policy: &resourcepolicies.IncludeExcludePolicy{ + IncludedNamespaceScopedResources: []string{"configmaps"}, + ExcludedNamespaceScopedResources: []string{"pods", "secrets"}, + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + }, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.clusterScopedResourceFilter.ShouldInclude("crds") && + !sie.clusterScopedResourceFilter.ShouldInclude("persistentvolumes") && + sie.namespaceScopedResourceFilter.ShouldInclude("pods") && + !sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") && + !sie.namespaceScopedResourceFilter.ShouldInclude("secrets") + }, + }, + { + name: "verify the case when there's '*' in the original include filter", + namespaceScopedIncludes: []string{"*"}, + namespaceScopedExcludes: []string{}, + clusterScopedIncludes: []string{}, + clusterScopedExcludes: []string{}, + policy: &resourcepolicies.IncludeExcludePolicy{ + IncludedNamespaceScopedResources: []string{"deployments", "pods"}, + ExcludedNamespaceScopedResources: []string{"configmaps", "secrets"}, + IncludedClusterScopedResources: []string{}, + ExcludedClusterScopedResources: []string{}, + }, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") && + sie.namespaceScopedResourceFilter.ShouldInclude("secrets") + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + logger := logrus.StandardLogger() + discoveryHelper := setupDiscoveryClientWithResources(apiResources) + sie := GetScopeResourceIncludesExcludes(discoveryHelper, logger, tc.namespaceScopedIncludes, tc.namespaceScopedExcludes, tc.clusterScopedIncludes, tc.clusterScopedExcludes, *NewIncludesExcludes()) + sie.CombineWithPolicy(tc.policy) + assert.True(t, tc.verify(*sie)) + }) + } +} + func TestUseOldResourceFilters(t *testing.T) { tests := []struct { name string @@ -748,9 +844,9 @@ func TestUseOldResourceFilters(t *testing.T) { useOldResourceFilters bool }{ { - name: "backup with no filters should use old filters", + name: "backup with no filters should use new filters", backup: *defaultBackup().Result(), - useOldResourceFilters: true, + useOldResourceFilters: false, }, { name: "backup with only old filters should use old filters", diff --git a/pkg/util/kube/priority_class.go b/pkg/util/kube/priority_class.go new file mode 100644 index 000000000..3f2ddc256 --- /dev/null +++ b/pkg/util/kube/priority_class.go @@ -0,0 +1,64 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kube + +import ( + "context" + + "github.com/sirupsen/logrus" + schedulingv1 "k8s.io/api/scheduling/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// ValidatePriorityClass checks if the specified priority class exists in the cluster +// Returns true if the priority class exists or if priorityClassName is empty +// Returns false if the priority class doesn't exist or validation fails +// Logs warnings when the priority class doesn't exist +func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, priorityClassName string, logger logrus.FieldLogger) bool { + if priorityClassName == "" { + return true + } + + _, err := kubeClient.SchedulingV1().PriorityClasses().Get(ctx, priorityClassName, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + logger.Warnf("Priority class %q not found in cluster. Pod creation may fail if the priority class doesn't exist when pods are scheduled.", priorityClassName) + } else { + logger.WithError(err).Warnf("Failed to validate priority class %q", priorityClassName) + } + return false + } + logger.Infof("Validated priority class %q exists in cluster", priorityClassName) + return true +} + +// ValidatePriorityClassWithClient checks if the specified priority class exists in the cluster using controller-runtime client +// Returns nil if the priority class exists or if priorityClassName is empty +// Returns error if the priority class doesn't exist or validation fails +func ValidatePriorityClassWithClient(ctx context.Context, cli client.Client, priorityClassName string) error { + if priorityClassName == "" { + return nil + } + + priorityClass := &schedulingv1.PriorityClass{} + err := cli.Get(ctx, types.NamespacedName{Name: priorityClassName}, priorityClass) + return err +} diff --git a/pkg/util/kube/priority_class_test.go b/pkg/util/kube/priority_class_test.go new file mode 100644 index 000000000..9d6a52599 --- /dev/null +++ b/pkg/util/kube/priority_class_test.go @@ -0,0 +1,128 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kube + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + schedulingv1 "k8s.io/api/scheduling/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + + velerotesting "github.com/vmware-tanzu/velero/pkg/test" +) + +func TestValidatePriorityClass(t *testing.T) { + tests := []struct { + name string + priorityClassName string + existingPCs []runtime.Object + clientReactors []k8stesting.ReactionFunc + expectedLogs []string + expectedLogLevel string + expectedResult bool + }{ + { + name: "empty priority class name should return without logging", + priorityClassName: "", + existingPCs: nil, + expectedLogs: nil, + expectedResult: true, + }, + { + name: "existing priority class should log info message", + priorityClassName: "high-priority", + existingPCs: []runtime.Object{ + &schedulingv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "high-priority", + }, + Value: 100, + }, + }, + expectedLogs: []string{"Validated priority class \\\"high-priority\\\" exists in cluster"}, + expectedLogLevel: "info", + expectedResult: true, + }, + { + name: "non-existing priority class should log warning", + priorityClassName: "does-not-exist", + existingPCs: nil, + expectedLogs: []string{"Priority class \\\"does-not-exist\\\" not found in cluster. Pod creation may fail if the priority class doesn't exist when pods are scheduled."}, + expectedLogLevel: "warning", + expectedResult: false, + }, + { + name: "API error should log warning with error", + priorityClassName: "test-priority", + existingPCs: nil, + clientReactors: []k8stesting.ReactionFunc{ + func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + if action.GetVerb() == "get" && action.GetResource().Resource == "priorityclasses" { + return true, nil, fmt.Errorf("API server error") + } + return false, nil, nil + }, + }, + expectedLogs: []string{"Failed to validate priority class \\\"test-priority\\\""}, + expectedLogLevel: "warning", + expectedResult: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Create fake client with existing priority classes + kubeClient := fake.NewSimpleClientset(test.existingPCs...) + + // Add any custom reactors + for _, reactor := range test.clientReactors { + kubeClient.PrependReactor("*", "*", reactor) + } + + // Create test logger with buffer + buffer := []string{} + logger := velerotesting.NewMultipleLogger(&buffer) + + // Call the function + result := ValidatePriorityClass(t.Context(), kubeClient, test.priorityClassName, logger) + + // Check result + assert.Equal(t, test.expectedResult, result, "ValidatePriorityClass returned unexpected result") + + // Check logs + if test.expectedLogs == nil { + assert.Empty(t, buffer) + } else { + assert.Len(t, buffer, len(test.expectedLogs)) + + for i, expectedLog := range test.expectedLogs { + assert.Contains(t, buffer[i], expectedLog) + if test.expectedLogLevel == "info" { + assert.Contains(t, buffer[i], "level=info") + } else if test.expectedLogLevel == "warning" { + assert.Contains(t, buffer[i], "level=warning") + } + } + } + }) + } +} diff --git a/site/content/docs/main/basic-install.md b/site/content/docs/main/basic-install.md index b84afcdde..79c432092 100644 --- a/site/content/docs/main/basic-install.md +++ b/site/content/docs/main/basic-install.md @@ -4,7 +4,7 @@ layout: docs --- Use this doc to get a basic installation of Velero. -Refer [this document](customize-installation.md) to customize your installation. +Refer [this document](customize-installation.md) to customize your installation, including setting priority classes for Velero components. ## Prerequisites diff --git a/site/content/docs/main/csi-snapshot-data-movement.md b/site/content/docs/main/csi-snapshot-data-movement.md index 304cf32f6..930fc1be5 100644 --- a/site/content/docs/main/csi-snapshot-data-movement.md +++ b/site/content/docs/main/csi-snapshot-data-movement.md @@ -18,7 +18,11 @@ On the other hand, there are quite some cases that CSI snapshot is not available CSI Snapshot Data Movement supports both built-in data mover and customized data movers. For the details of how Velero works with customized data movers, check the [Volume Snapshot Data Movement design][1]. Velero provides a built-in data mover which uses Velero built-in uploaders (at present the available uploader is Kopia uploader) to read the snapshot data and write to the Unified Repository (by default implemented by Kopia repository). -Velero built-in data mover restores both volume data and metadata, so the data mover pods need to run as root user. +Velero built-in data mover restores both volume data and metadata, so the data mover pods need to run as root user. + +### Priority Class Configuration + +For Velero built-in data mover, data mover pods launched during CSI snapshot data movement will use the priority class name configured in the node-agent configmap. The node-agent daemonset itself gets its priority class from the `--node-agent-priority-class-name` flag during Velero installation. This can help ensure proper scheduling behavior in resource-constrained environments. For more details on configuring data mover pod resources, see [Data Movement Pod Resource Configuration][11]. ## Setup CSI Snapshot Data Movement @@ -364,7 +368,7 @@ At present, Velero doesn't allow to set `ReadOnlyRootFileSystem` parameter to da Both the uploader and repository consume remarkable CPU/memory during the backup/restore, especially for massive small files or large backup size cases. For Velero built-in data mover, Velero uses [BestEffort as the QoS][13] for data mover pods (so no CPU/memory request/limit is set), so that backups/restores wouldn't fail due to resource throttling in any cases. -If you want to constraint the CPU/memory usage, you need to [Customize Data Mover Pod Resource Limits][11]. The CPU/memory consumption is always related to the scale of data to be backed up/restored, refer to [Performance Guidance][12] for more details, so it is highly recommended that you perform your own testing to find the best resource limits for your data. +If you want to constraint the CPU/memory usage, you need to [Customize Data Mover Pod Resource Limits][11]. The CPU/memory consumption is always related to the scale of data to be backed up/restored, refer to [Performance Guidance][12] for more details, so it is highly recommended that you perform your own testing to find the best resource limits for your data. During the restore, the repository may also cache data/metadata so as to reduce the network footprint and speed up the restore. The repository uses its own policy to store and clean up the cache. For Kopia repository, the cache is stored in the data mover pod's root file system. Velero allows you to configure a limit of the cache size so that the data mover pod won't be evicted due to running out of the ephemeral storage. For more details, check [Backup Repository Configuration][17]. diff --git a/site/content/docs/main/customize-installation.md b/site/content/docs/main/customize-installation.md index e6a8797dc..a7cf965a6 100644 --- a/site/content/docs/main/customize-installation.md +++ b/site/content/docs/main/customize-installation.md @@ -96,6 +96,53 @@ Note that if you specify `--colorized=true` as a CLI option it will override the config file setting. +## Set priority class names for Velero components + +You can set priority class names for different Velero components during installation. This allows you to influence the scheduling and eviction behavior of Velero pods, which can be useful in clusters where resource contention is high. + +### Priority class configuration options: + +1. **Velero server deployment**: Use the `--server-priority-class-name` flag +2. **Node agent daemonset**: Use the `--node-agent-priority-class-name` flag +3. **Data mover pods**: Configure through the node-agent configmap (see below) +4. **Maintenance jobs**: Configure through the repository maintenance job configmap (see below) + +```bash +velero install \ + --server-priority-class-name= \ + --node-agent-priority-class-name= +``` + +### Configuring priority classes for data mover pods and maintenance jobs + +For data mover pods and maintenance jobs, priority classes are configured through ConfigMaps that must be created before installation: + +**Data mover pods** (via node-agent configmap): +```bash +kubectl create configmap node-agent-config -n velero --from-file=config.json=/dev/stdin < ``` +### Priority Class + +Data mover pods will use the priorityClassName configured in the node-agent configmap. The priorityClassName for data mover pods is configured through the node-agent configmap (specified via the `--node-agent-configmap` flag), while the node-agent daemonset itself uses the priority class set by the `--node-agent-priority-class-name` flag during Velero installation. + +#### When to Use Priority Classes + +**Higher Priority Classes** (e.g., `system-cluster-critical`, `system-node-critical`, or custom high-priority): +- When you have dedicated nodes for backup operations +- When backup/restore operations are time-critical +- When you want to ensure data mover pods are scheduled even during high cluster utilization +- For disaster recovery scenarios where restore speed is critical + +**Lower Priority Classes** (e.g., `low-priority` or negative values): +- When you want to protect production workload performance +- When backup operations can be delayed during peak hours +- When cluster resources are limited and production workloads take precedence +- For non-critical backup operations that can tolerate delays + +#### Consequences of Priority Class Settings + +**High Priority**: +- ✅ Data mover pods are more likely to be scheduled quickly +- ✅ Less likely to be preempted by other workloads +- ❌ May cause resource pressure on production workloads +- ❌ Could lead to production pod evictions in extreme cases + +**Low Priority**: +- ✅ Production workloads are protected from resource competition +- ✅ Cluster stability is maintained during backup operations +- ❌ Backup/restore operations may take longer to start +- ❌ Data mover pods may be preempted, causing backup failures +- ❌ In resource-constrained clusters, backups might not run at all + +#### Example Configuration + +To configure priority class for data mover pods, include it in your node-agent configmap: + +```json +{ + "podResources": { + "cpuRequest": "1000m", + "cpuLimit": "2000m", + "memoryRequest": "1Gi", + "memoryLimit": "4Gi" + }, + "priorityClassName": "backup-priority" +} +``` + +First, create the priority class in your cluster: + +```yaml +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: backup-priority +value: 1000 +globalDefault: false +description: "Priority class for Velero data mover pods" +``` + +Then create or update the node-agent configmap: + +```bash +kubectl create cm node-agent-config -n velero --from-file=node-agent-config.json +``` + +**Note**: If the specified priority class doesn't exist in the cluster when data mover pods are created, the pods will fail to schedule. Velero validates the priority class at startup and logs a warning if it doesn't exist, but the pods will still attempt to use it. + [1]: csi-snapshot-data-movement.md [2]: file-system-backup.md [3]: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/ diff --git a/site/content/docs/main/file-system-backup.md b/site/content/docs/main/file-system-backup.md index ab12accd4..b4d904f1b 100644 --- a/site/content/docs/main/file-system-backup.md +++ b/site/content/docs/main/file-system-backup.md @@ -691,6 +691,10 @@ spec: ...... ``` +## Priority Class Configuration + +For Velero built-in data mover, data mover pods launched during file system backup will use the priority class name configured in the node-agent configmap. The node-agent daemonset itself gets its priority class from the `--node-agent-priority-class-name` flag during Velero installation. This can help ensure proper scheduling behavior in resource-constrained environments. For more details on configuring data mover pod resources, see [Data Movement Pod Resource Configuration][data-movement-config]. + ## Resource Consumption Both the uploader and repository consume remarkable CPU/memory during the backup/restore, especially for massive small files or large backup size cases. @@ -762,3 +766,4 @@ Velero still effectively manage restic repository, though you cannot write any n [18]: backup-repository-configuration.md [19]: node-agent-concurrency.md [20]: node-agent-prepare-queue-length.md +[data-movement-config]: data-movement-pod-resource-configuration.md diff --git a/site/content/docs/main/repository-maintenance.md b/site/content/docs/main/repository-maintenance.md index 10237de5c..439f219ce 100644 --- a/site/content/docs/main/repository-maintenance.md +++ b/site/content/docs/main/repository-maintenance.md @@ -155,7 +155,7 @@ Status: - `Recent Maintenance` keeps the status of the recent 3 maintenance jobs, including its start time, result (succeeded/failed), completion time (if the maintenance job succeeded), or error message (if the maintenance failed) ### Others -Maintenance jobs will inherit toleration, nodeSelector, service account, image, environment variables, cloud-credentials etc. from Velero deployment. +Maintenance jobs will inherit toleration, nodeSelector, service account, image, environment variables, cloud-credentials, priorityClassName etc. from Velero deployment. For labels and annotations, maintenance jobs do NOT inherit all labels and annotations from the Velero deployment. Instead, they include: @@ -171,7 +171,24 @@ For labels and annotations, maintenance jobs do NOT inherit all labels and annot * `iam.amazonaws.com/role` **Important:** Other labels and annotations from the Velero deployment are NOT inherited by maintenance jobs. This is by design to ensure only specific labels and annotations required for cloud provider identity systems are propagated. -Maintenance jobs will not run for backup repositories whose backup storage location is set as readOnly. +Maintenance jobs will not run for backup repositories whose backup storage location is set as readOnly. + +#### Priority Class Configuration +Maintenance jobs can be configured with a specific priority class through the repository maintenance job ConfigMap. The priority class name should be specified in the global configuration section: + +```json +{ + "global": { + "priorityClassName": "low-priority", + "podResources": { + "cpuRequest": "100m", + "memoryRequest": "128Mi" + } + } +} +``` + +Note that priority class configuration is only read from the global configuration section, ensuring all maintenance jobs use the same priority class regardless of which repository they are maintaining. [1]: velero-install.md#usage [2]: node-agent-concurrency.md diff --git a/site/content/docs/main/velero-install.md b/site/content/docs/main/velero-install.md index 51c52db1c..a4925278c 100644 --- a/site/content/docs/main/velero-install.md +++ b/site/content/docs/main/velero-install.md @@ -31,12 +31,16 @@ velero install \ [--maintenance-job-cpu-request ] \ [--maintenance-job-mem-request ] \ [--maintenance-job-cpu-limit ] \ - [--maintenance-job-mem-limit ] + [--maintenance-job-mem-limit ] \ + [--server-priority-class-name ] \ + [--node-agent-priority-class-name ] ``` The values for the resource requests and limits flags follow the same format as [Kubernetes resource requirements][3] For plugin container images, please refer to our [supported providers][2] page. +The `--server-priority-class-name` and `--node-agent-priority-class-name` flags allow you to set priority classes for the Velero server deployment and node agent daemonset respectively. This can help ensure proper scheduling and eviction behavior in resource-constrained environments. Note that you must create the priority class before installing Velero. + ## Examples This section provides examples that serve as a starting point for more customized installations. diff --git a/test/util/velero/install.go b/test/util/velero/install.go index 887bef479..ac993b434 100644 --- a/test/util/velero/install.go +++ b/test/util/velero/install.go @@ -418,6 +418,14 @@ func installVeleroServer( args = append(args, "--sa-annotations", options.ServiceAccountAnnotations.String()) } + if options.ServerPriorityClassName != "" { + args = append(args, "--server-priority-class-name", options.ServerPriorityClassName) + } + + if options.NodeAgentPriorityClassName != "" { + args = append(args, "--node-agent-priority-class-name", options.NodeAgentPriorityClassName) + } + // Only version no older than v1.15 support --backup-repository-configmap. if options.BackupRepoConfigMap != "" && (semver.Compare(version, "v1.15") >= 0 || version == "main") { @@ -425,6 +433,14 @@ func installVeleroServer( args = append(args, fmt.Sprintf("--backup-repository-configmap=%s", options.BackupRepoConfigMap)) } + if options.RepoMaintenanceJobConfigMap != "" { + args = append(args, fmt.Sprintf("--repo-maintenance-job-configmap=%s", options.RepoMaintenanceJobConfigMap)) + } + + if options.NodeAgentConfigMap != "" { + args = append(args, fmt.Sprintf("--node-agent-configmap=%s", options.NodeAgentConfigMap)) + } + if err := createVeleroResources(ctx, cli, namespace, args, options); err != nil { return err } diff --git a/test/util/velero/velero_utils.go b/test/util/velero/velero_utils.go index db417a0a3..59212861e 100644 --- a/test/util/velero/velero_utils.go +++ b/test/util/velero/velero_utils.go @@ -272,6 +272,8 @@ func getProviderVeleroInstallOptions(veleroCfg *VeleroConfig, io.VeleroPodMemRequest = veleroCfg.VeleroPodMemRequest io.DisableInformerCache = veleroCfg.DisableInformerCache io.ItemBlockWorkerCount = veleroCfg.ItemBlockWorkerCount + io.ServerPriorityClassName = veleroCfg.ServerPriorityClassName + io.NodeAgentPriorityClassName = veleroCfg.NodeAgentPriorityClassName return io, nil }