mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-08 22:23:15 +00:00
- Add --server-priority-class-name and --node-agent-priority-class-name flags to velero install command - Configure data mover pods (PVB/PVR/DataUpload/DataDownload) to use priority class from node-agent-configmap - Configure maintenance jobs to use priority class from repo-maintenance-job-configmap (global config only) - Add priority class validation with ValidatePriorityClass and GetDataMoverPriorityClassName utilities - Update e2e tests to include PriorityClass testing utilities - Move priority class design document to Implemented folder - Add comprehensive unit tests for all priority class implementations - Update documentation for priority class configuration - Add changelog entry for #8883 Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> remove unused test utils Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> feat: add unit test for getting priority class name in maintenance jobs Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> doc update Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> feat: add priority class validation for repository maintenance jobs - Add ValidatePriorityClassWithClient function to validate priority class existence - Integrate validation in maintenance.go when creating maintenance jobs - Update tests to cover the new validation functionality - Return boolean from ValidatePriorityClass to allow fallback behavior This ensures maintenance jobs don't fail due to non-existent priority classes, following the same pattern used for data mover pods. Addresses feedback from: https://github.com/vmware-tanzu/velero/pull/8883#discussion_r2238681442 Refs #8869 Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> refactor: clean up priority class handling for data mover pods - Fix comment in node_agent.go to clarify PriorityClassName is only for data mover pods - Simplify server.go to use dataPathConfigs.PriorityClassName directly - Remove redundant priority class logging from controllers as it's already logged during server startup - Keep logging centralized in the node-agent server initialization This reduces code duplication and clarifies the scope of priority class configuration. 🤖 Generated with [Claude Code](https://claude.ai/code) Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> refactor: remove GetDataMoverPriorityClassName from kube utilities Remove GetDataMoverPriorityClassName function and its tests as priority class is now read directly from dataPathConfigs instead of parsing from ConfigMap. This simplifies the codebase by eliminating the need for indirect ConfigMap parsing. Refs #8869 🤖 Generated with [Claude Code](https://claude.ai/code) Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> refactor: remove priority class validation from install command Remove priority class validation during install as it's redundant since validation already occurs during server startup. Users cannot see console logs during install, making the validation warnings ineffective at this stage. The validation remains in place during server and node-agent startup where it's more appropriate and visible to users. Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> Co-Authored-By: Claude <noreply@anthropic.com>
327 lines
10 KiB
Go
327 lines
10 KiB
Go
/*
|
|
Copyright The Velero Contributors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package nodeagent
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
|
|
"github.com/pkg/errors"
|
|
corev1api "k8s.io/api/core/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/client-go/kubernetes"
|
|
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
|
|
|
|
"github.com/vmware-tanzu/velero/pkg/util/kube"
|
|
)
|
|
|
|
const (
|
|
// daemonSet is the name of the Velero node agent daemonset on linux nodes.
|
|
daemonSet = "node-agent"
|
|
|
|
// daemonsetWindows is the name of the Velero node agent daemonset on Windows nodes.
|
|
daemonsetWindows = "node-agent-windows"
|
|
|
|
// nodeAgentRole marks pods with node-agent role on all nodes.
|
|
nodeAgentRole = "node-agent"
|
|
|
|
// HostPodVolumeMount is the name of the volume in node-agent for host-pod mount
|
|
HostPodVolumeMount = "host-pods"
|
|
|
|
// HostPodVolumeMountPoint is the mount point of the volume in node-agent for host-pod mount
|
|
HostPodVolumeMountPoint = "host_pods"
|
|
)
|
|
|
|
var (
|
|
ErrDaemonSetNotFound = errors.New("daemonset not found")
|
|
ErrNodeAgentLabelNotFound = errors.New("node-agent label not found")
|
|
ErrNodeAgentAnnotationNotFound = errors.New("node-agent annotation not found")
|
|
ErrNodeAgentTolerationNotFound = errors.New("node-agent toleration not found")
|
|
)
|
|
|
|
type LoadConcurrency struct {
|
|
// GlobalConfig specifies the concurrency number to all nodes for which per-node config is not specified
|
|
GlobalConfig int `json:"globalConfig,omitempty"`
|
|
|
|
// PerNodeConfig specifies the concurrency number to nodes matched by rules
|
|
PerNodeConfig []RuledConfigs `json:"perNodeConfig,omitempty"`
|
|
|
|
// PrepareQueueLength specifies the max number of loads that are under expose
|
|
PrepareQueueLength int `json:"prepareQueueLength,omitempty"`
|
|
}
|
|
|
|
type LoadAffinity struct {
|
|
// NodeSelector specifies the label selector to match nodes
|
|
NodeSelector metav1.LabelSelector `json:"nodeSelector"`
|
|
}
|
|
|
|
type RuledConfigs struct {
|
|
// NodeSelector specifies the label selector to match nodes
|
|
NodeSelector metav1.LabelSelector `json:"nodeSelector"`
|
|
|
|
// Number specifies the number value associated to the matched nodes
|
|
Number int `json:"number"`
|
|
}
|
|
|
|
type BackupPVC struct {
|
|
// StorageClass is the name of storage class to be used by the backupPVC
|
|
StorageClass string `json:"storageClass,omitempty"`
|
|
|
|
// ReadOnly sets the backupPVC's access mode as read only
|
|
ReadOnly bool `json:"readOnly,omitempty"`
|
|
|
|
// SPCNoRelabeling sets Spec.SecurityContext.SELinux.Type to "spc_t" for the pod mounting the backupPVC
|
|
// ignored if ReadOnly is false
|
|
SPCNoRelabeling bool `json:"spcNoRelabeling,omitempty"`
|
|
}
|
|
|
|
type RestorePVC struct {
|
|
// IgnoreDelayBinding indicates to ignore delay binding the restorePVC when it is in WaitForFirstConsumer mode
|
|
IgnoreDelayBinding bool `json:"ignoreDelayBinding,omitempty"`
|
|
}
|
|
|
|
type Configs struct {
|
|
// LoadConcurrency is the config for data path load concurrency per node.
|
|
LoadConcurrency *LoadConcurrency `json:"loadConcurrency,omitempty"`
|
|
|
|
// LoadAffinity is the config for data path load affinity.
|
|
LoadAffinity []*kube.LoadAffinity `json:"loadAffinity,omitempty"`
|
|
|
|
// BackupPVCConfig is the config for backupPVC (intermediate PVC) of snapshot data movement
|
|
BackupPVCConfig map[string]BackupPVC `json:"backupPVC,omitempty"`
|
|
|
|
// RestoreVCConfig is the config for restorePVC (intermediate PVC) of generic restore
|
|
RestorePVCConfig *RestorePVC `json:"restorePVC,omitempty"`
|
|
|
|
// PodResources is the resource config for various types of pods launched by node-agent, i.e., data mover pods.
|
|
PodResources *kube.PodResources `json:"podResources,omitempty"`
|
|
|
|
// PriorityClassName is the priority class name for data mover pods created by the node agent
|
|
PriorityClassName string `json:"priorityClassName,omitempty"`
|
|
}
|
|
|
|
func IsRunningOnLinux(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error {
|
|
return isRunning(ctx, kubeClient, namespace, daemonSet)
|
|
}
|
|
|
|
func IsRunningOnWindows(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error {
|
|
return isRunning(ctx, kubeClient, namespace, daemonsetWindows)
|
|
}
|
|
|
|
func isRunning(ctx context.Context, kubeClient kubernetes.Interface, namespace string, daemonset string) error {
|
|
if _, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, daemonset, metav1.GetOptions{}); apierrors.IsNotFound(err) {
|
|
return ErrDaemonSetNotFound
|
|
} else if err != nil {
|
|
return err
|
|
} else {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// KbClientIsRunningInNode checks if the node agent pod is running properly in a specified node through kube client. If not, return the error found
|
|
func KbClientIsRunningInNode(ctx context.Context, namespace string, nodeName string, kubeClient kubernetes.Interface) error {
|
|
return isRunningInNode(ctx, namespace, nodeName, nil, kubeClient)
|
|
}
|
|
|
|
// IsRunningInNode checks if the node agent pod is running properly in a specified node through controller client. If not, return the error found
|
|
func IsRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client) error {
|
|
return isRunningInNode(ctx, namespace, nodeName, crClient, nil)
|
|
}
|
|
|
|
func isRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client, kubeClient kubernetes.Interface) error {
|
|
if nodeName == "" {
|
|
return errors.New("node name is empty")
|
|
}
|
|
|
|
pods := new(corev1api.PodList)
|
|
parsedSelector, err := labels.Parse(fmt.Sprintf("role=%s", nodeAgentRole))
|
|
if err != nil {
|
|
return errors.Wrap(err, "fail to parse selector")
|
|
}
|
|
|
|
if crClient != nil {
|
|
err = crClient.List(ctx, pods, &ctrlclient.ListOptions{LabelSelector: parsedSelector})
|
|
} else {
|
|
pods, err = kubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: parsedSelector.String()})
|
|
}
|
|
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to list node-agent pods")
|
|
}
|
|
|
|
for i := range pods.Items {
|
|
if kube.IsPodRunning(&pods.Items[i]) != nil {
|
|
continue
|
|
}
|
|
|
|
if pods.Items[i].Spec.NodeName == nodeName {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return errors.Errorf("daemonset pod not found in running state in node %s", nodeName)
|
|
}
|
|
|
|
func GetPodSpec(ctx context.Context, kubeClient kubernetes.Interface, namespace string, osType string) (*corev1api.PodSpec, error) {
|
|
dsName := daemonSet
|
|
if osType == kube.NodeOSWindows {
|
|
dsName = daemonsetWindows
|
|
}
|
|
|
|
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error to get %s daemonset", dsName)
|
|
}
|
|
|
|
return &ds.Spec.Template.Spec, nil
|
|
}
|
|
|
|
func GetConfigs(ctx context.Context, namespace string, kubeClient kubernetes.Interface, configName string) (*Configs, error) {
|
|
cm, err := kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, configName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error to get node agent configs %s", configName)
|
|
}
|
|
|
|
if cm.Data == nil {
|
|
return nil, errors.Errorf("data is not available in config map %s", configName)
|
|
}
|
|
|
|
jsonString := ""
|
|
for _, v := range cm.Data {
|
|
jsonString = v
|
|
}
|
|
|
|
configs := &Configs{}
|
|
err = json.Unmarshal([]byte(jsonString), configs)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error to unmarshall configs from %s", configName)
|
|
}
|
|
|
|
return configs, nil
|
|
}
|
|
|
|
func GetLabelValue(ctx context.Context, kubeClient kubernetes.Interface, namespace string, key string, osType string) (string, error) {
|
|
dsName := daemonSet
|
|
if osType == kube.NodeOSWindows {
|
|
dsName = daemonsetWindows
|
|
}
|
|
|
|
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "error getting %s daemonset", dsName)
|
|
}
|
|
|
|
if ds.Spec.Template.Labels == nil {
|
|
return "", ErrNodeAgentLabelNotFound
|
|
}
|
|
|
|
val, found := ds.Spec.Template.Labels[key]
|
|
if !found {
|
|
return "", ErrNodeAgentLabelNotFound
|
|
}
|
|
|
|
return val, nil
|
|
}
|
|
|
|
func GetAnnotationValue(ctx context.Context, kubeClient kubernetes.Interface, namespace string, key string, osType string) (string, error) {
|
|
dsName := daemonSet
|
|
if osType == kube.NodeOSWindows {
|
|
dsName = daemonsetWindows
|
|
}
|
|
|
|
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "error getting %s daemonset", dsName)
|
|
}
|
|
|
|
if ds.Spec.Template.Annotations == nil {
|
|
return "", ErrNodeAgentAnnotationNotFound
|
|
}
|
|
|
|
val, found := ds.Spec.Template.Annotations[key]
|
|
if !found {
|
|
return "", ErrNodeAgentAnnotationNotFound
|
|
}
|
|
|
|
return val, nil
|
|
}
|
|
|
|
func GetToleration(ctx context.Context, kubeClient kubernetes.Interface, namespace string, key string, osType string) (*corev1api.Toleration, error) {
|
|
dsName := daemonSet
|
|
if osType == kube.NodeOSWindows {
|
|
dsName = daemonsetWindows
|
|
}
|
|
|
|
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error getting %s daemonset", dsName)
|
|
}
|
|
|
|
for i, t := range ds.Spec.Template.Spec.Tolerations {
|
|
if t.Key == key {
|
|
return &ds.Spec.Template.Spec.Tolerations[i], nil
|
|
}
|
|
}
|
|
|
|
return nil, ErrNodeAgentTolerationNotFound
|
|
}
|
|
|
|
func GetHostPodPath(ctx context.Context, kubeClient kubernetes.Interface, namespace string, osType string) (string, error) {
|
|
dsName := daemonSet
|
|
if osType == kube.NodeOSWindows {
|
|
dsName = daemonsetWindows
|
|
}
|
|
|
|
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "error getting daemonset %s", dsName)
|
|
}
|
|
|
|
var volume *corev1api.Volume
|
|
for _, v := range ds.Spec.Template.Spec.Volumes {
|
|
if v.Name == HostPodVolumeMount {
|
|
volume = &v
|
|
break
|
|
}
|
|
}
|
|
|
|
if volume == nil {
|
|
return "", errors.New("host pod volume is not found")
|
|
}
|
|
|
|
if volume.HostPath == nil {
|
|
return "", errors.New("host pod volume is not a host path volume")
|
|
}
|
|
|
|
if volume.HostPath.Path == "" {
|
|
return "", errors.New("host pod volume path is empty")
|
|
}
|
|
|
|
return volume.HostPath.Path, nil
|
|
}
|
|
|
|
func HostPodVolumeMountPath() string {
|
|
return "/" + HostPodVolumeMountPoint
|
|
}
|
|
|
|
func HostPodVolumeMountPathWin() string {
|
|
return "\\" + HostPodVolumeMountPoint
|
|
}
|