mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-07 13:55:20 +00:00
Add priority class support for Velero server and node-agent
- Add --server-priority-class-name and --node-agent-priority-class-name flags to velero install command - Configure data mover pods (PVB/PVR/DataUpload/DataDownload) to use priority class from node-agent-configmap - Configure maintenance jobs to use priority class from repo-maintenance-job-configmap (global config only) - Add priority class validation with ValidatePriorityClass and GetDataMoverPriorityClassName utilities - Update e2e tests to include PriorityClass testing utilities - Move priority class design document to Implemented folder - Add comprehensive unit tests for all priority class implementations - Update documentation for priority class configuration - Add changelog entry for #8883 Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> remove unused test utils Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> feat: add unit test for getting priority class name in maintenance jobs Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> doc update Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> feat: add priority class validation for repository maintenance jobs - Add ValidatePriorityClassWithClient function to validate priority class existence - Integrate validation in maintenance.go when creating maintenance jobs - Update tests to cover the new validation functionality - Return boolean from ValidatePriorityClass to allow fallback behavior This ensures maintenance jobs don't fail due to non-existent priority classes, following the same pattern used for data mover pods. Addresses feedback from: https://github.com/vmware-tanzu/velero/pull/8883#discussion_r2238681442 Refs #8869 Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> refactor: clean up priority class handling for data mover pods - Fix comment in node_agent.go to clarify PriorityClassName is only for data mover pods - Simplify server.go to use dataPathConfigs.PriorityClassName directly - Remove redundant priority class logging from controllers as it's already logged during server startup - Keep logging centralized in the node-agent server initialization This reduces code duplication and clarifies the scope of priority class configuration. 🤖 Generated with [Claude Code](https://claude.ai/code) Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> refactor: remove GetDataMoverPriorityClassName from kube utilities Remove GetDataMoverPriorityClassName function and its tests as priority class is now read directly from dataPathConfigs instead of parsing from ConfigMap. This simplifies the codebase by eliminating the need for indirect ConfigMap parsing. Refs #8869 🤖 Generated with [Claude Code](https://claude.ai/code) Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> refactor: remove priority class validation from install command Remove priority class validation during install as it's redundant since validation already occurs during server startup. Users cannot see console logs during install, making the validation warnings ineffective at this stage. The validation remains in place during server and node-agent startup where it's more appropriate and visible to users. Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -91,6 +91,8 @@ type Options struct {
|
||||
ItemBlockWorkerCount int
|
||||
NodeAgentDisableHostPath bool
|
||||
kubeletRootDir string
|
||||
ServerPriorityClassName string
|
||||
NodeAgentPriorityClassName string
|
||||
}
|
||||
|
||||
// BindFlags adds command line values to the options struct.
|
||||
@@ -194,6 +196,18 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
|
||||
o.ItemBlockWorkerCount,
|
||||
"Number of worker threads to process ItemBlocks. Default is one. Optional.",
|
||||
)
|
||||
flags.StringVar(
|
||||
&o.ServerPriorityClassName,
|
||||
"server-priority-class-name",
|
||||
o.ServerPriorityClassName,
|
||||
"Priority class name for the Velero server deployment. Optional.",
|
||||
)
|
||||
flags.StringVar(
|
||||
&o.NodeAgentPriorityClassName,
|
||||
"node-agent-priority-class-name",
|
||||
o.NodeAgentPriorityClassName,
|
||||
"Priority class name for the node agent daemonset. Optional.",
|
||||
)
|
||||
}
|
||||
|
||||
// NewInstallOptions instantiates a new, default InstallOptions struct.
|
||||
@@ -301,6 +315,8 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
|
||||
ItemBlockWorkerCount: o.ItemBlockWorkerCount,
|
||||
KubeletRootDir: o.kubeletRootDir,
|
||||
NodeAgentDisableHostPath: o.NodeAgentDisableHostPath,
|
||||
ServerPriorityClassName: o.ServerPriorityClassName,
|
||||
NodeAgentPriorityClassName: o.NodeAgentPriorityClassName,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -389,6 +405,7 @@ func (o *Options) Run(c *cobra.Command, f client.Factory) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
errorMsg := fmt.Sprintf("\n\nError installing Velero. Use `kubectl logs deploy/velero -n %s` to check the deploy logs", o.Namespace)
|
||||
|
||||
err = install.Install(dynamicFactory, kbClient, resources, os.Stdout)
|
||||
|
||||
93
pkg/cmd/cli/install/install_test.go
Normal file
93
pkg/cmd/cli/install/install_test.go
Normal file
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
Copyright the Velero contributors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package install
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestPriorityClassNameFlag(t *testing.T) {
|
||||
// Test that the flag is properly defined
|
||||
o := NewInstallOptions()
|
||||
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
|
||||
o.BindFlags(flags)
|
||||
|
||||
// Verify the server priority class flag exists
|
||||
serverFlag := flags.Lookup("server-priority-class-name")
|
||||
assert.NotNil(t, serverFlag, "server-priority-class-name flag should exist")
|
||||
assert.Equal(t, "Priority class name for the Velero server deployment. Optional.", serverFlag.Usage)
|
||||
|
||||
// Verify the node agent priority class flag exists
|
||||
nodeAgentFlag := flags.Lookup("node-agent-priority-class-name")
|
||||
assert.NotNil(t, nodeAgentFlag, "node-agent-priority-class-name flag should exist")
|
||||
assert.Equal(t, "Priority class name for the node agent daemonset. Optional.", nodeAgentFlag.Usage)
|
||||
|
||||
// Test with values for both server and node agent
|
||||
testCases := []struct {
|
||||
name string
|
||||
serverPriorityClassName string
|
||||
nodeAgentPriorityClassName string
|
||||
expectedServerValue string
|
||||
expectedNodeAgentValue string
|
||||
}{
|
||||
{
|
||||
name: "with both priority class names",
|
||||
serverPriorityClassName: "high-priority",
|
||||
nodeAgentPriorityClassName: "medium-priority",
|
||||
expectedServerValue: "high-priority",
|
||||
expectedNodeAgentValue: "medium-priority",
|
||||
},
|
||||
{
|
||||
name: "with only server priority class name",
|
||||
serverPriorityClassName: "high-priority",
|
||||
nodeAgentPriorityClassName: "",
|
||||
expectedServerValue: "high-priority",
|
||||
expectedNodeAgentValue: "",
|
||||
},
|
||||
{
|
||||
name: "with only node agent priority class name",
|
||||
serverPriorityClassName: "",
|
||||
nodeAgentPriorityClassName: "medium-priority",
|
||||
expectedServerValue: "",
|
||||
expectedNodeAgentValue: "medium-priority",
|
||||
},
|
||||
{
|
||||
name: "without priority class names",
|
||||
serverPriorityClassName: "",
|
||||
nodeAgentPriorityClassName: "",
|
||||
expectedServerValue: "",
|
||||
expectedNodeAgentValue: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
o := NewInstallOptions()
|
||||
o.ServerPriorityClassName = tc.serverPriorityClassName
|
||||
o.NodeAgentPriorityClassName = tc.nodeAgentPriorityClassName
|
||||
|
||||
veleroOptions, err := o.AsVeleroOptions()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tc.expectedServerValue, veleroOptions.ServerPriorityClassName)
|
||||
assert.Equal(t, tc.expectedNodeAgentValue, veleroOptions.NodeAgentPriorityClassName)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -280,6 +280,21 @@ func (s *nodeAgentServer) run() {
|
||||
|
||||
s.logger.Info("Starting controllers")
|
||||
|
||||
// Get priority class from dataPathConfigs if available
|
||||
dataMovePriorityClass := ""
|
||||
if s.dataPathConfigs != nil && s.dataPathConfigs.PriorityClassName != "" {
|
||||
priorityClass := s.dataPathConfigs.PriorityClassName
|
||||
// Validate the priority class exists in the cluster
|
||||
ctx, cancel := context.WithTimeout(s.ctx, time.Second*30)
|
||||
defer cancel()
|
||||
if kube.ValidatePriorityClass(ctx, s.kubeClient, priorityClass, s.logger.WithField("component", "data-mover")) {
|
||||
dataMovePriorityClass = priorityClass
|
||||
s.logger.WithField("priorityClassName", priorityClass).Info("Using priority class for data mover pods")
|
||||
} else {
|
||||
s.logger.WithField("priorityClassName", priorityClass).Warn("Priority class not found in cluster, data mover pods will use default priority")
|
||||
}
|
||||
}
|
||||
|
||||
var loadAffinity []*kube.LoadAffinity
|
||||
if s.dataPathConfigs != nil && len(s.dataPathConfigs.LoadAffinity) > 0 {
|
||||
loadAffinity = s.dataPathConfigs.LoadAffinity
|
||||
@@ -311,12 +326,12 @@ func (s *nodeAgentServer) run() {
|
||||
}
|
||||
}
|
||||
|
||||
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger)
|
||||
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass)
|
||||
if err := pvbReconciler.SetupWithManager(s.mgr); err != nil {
|
||||
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup)
|
||||
}
|
||||
|
||||
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger)
|
||||
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger, dataMovePriorityClass)
|
||||
if err := pvrReconciler.SetupWithManager(s.mgr); err != nil {
|
||||
s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller")
|
||||
}
|
||||
@@ -340,6 +355,7 @@ func (s *nodeAgentServer) run() {
|
||||
s.config.dataMoverPrepareTimeout,
|
||||
s.logger,
|
||||
s.metrics,
|
||||
dataMovePriorityClass,
|
||||
)
|
||||
if err := dataUploadReconciler.SetupWithManager(s.mgr); err != nil {
|
||||
s.logger.WithError(err).Fatal("Unable to create the data upload controller")
|
||||
@@ -364,6 +380,7 @@ func (s *nodeAgentServer) run() {
|
||||
s.config.dataMoverPrepareTimeout,
|
||||
s.logger,
|
||||
s.metrics,
|
||||
dataMovePriorityClass,
|
||||
)
|
||||
|
||||
if err := dataDownloadReconciler.SetupWithManager(s.mgr); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user