Merge pull request #9072 from Lyndon-Li/issue-fix-8857
Some checks failed
Run the E2E test on kind / build (push) Failing after 7m29s
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 36s

Issue 8857: support third party tolerations
This commit is contained in:
lyndon-li
2025-07-11 11:08:25 +08:00
committed by GitHub
12 changed files with 263 additions and 15 deletions

View File

@@ -0,0 +1 @@
Fix issue #8857, support third party tolerations for data mover pods

View File

@@ -840,6 +840,17 @@ func (r *DataDownloadReconciler) setupExposeParam(dd *velerov2alpha1api.DataDown
}
}
hostingPodTolerations := []corev1api.Toleration{}
for _, k := range util.ThirdPartyTolerations {
if v, err := nodeagent.GetToleration(context.Background(), r.kubeClient, dd.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentTolerationNotFound {
log.WithError(err).Warnf("Failed to check node-agent toleration, skip adding host pod toleration %s", k)
}
} else {
hostingPodTolerations = append(hostingPodTolerations, *v)
}
}
affinity := kube.GetLoadAffinityByStorageClass(r.loadAffinity, dd.Spec.BackupStorageLocation, log)
return exposer.GenericRestoreExposeParam{
@@ -847,6 +858,7 @@ func (r *DataDownloadReconciler) setupExposeParam(dd *velerov2alpha1api.DataDown
TargetNamespace: dd.Spec.TargetVolume.Namespace,
HostingPodLabels: hostingPodLabels,
HostingPodAnnotations: hostingPodAnnotation,
HostingPodTolerations: hostingPodTolerations,
Resources: r.podResources,
OperationTimeout: dd.Spec.OperationTimeout.Duration,
ExposeTimeout: r.preparingTimeout,

View File

@@ -917,6 +917,17 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload
}
}
hostingPodTolerations := []corev1api.Toleration{}
for _, k := range util.ThirdPartyTolerations {
if v, err := nodeagent.GetToleration(context.Background(), r.kubeClient, du.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentTolerationNotFound {
log.WithError(err).Warnf("Failed to check node-agent toleration, skip adding host pod toleration %s", k)
}
} else {
hostingPodTolerations = append(hostingPodTolerations, *v)
}
}
affinity := kube.GetLoadAffinityByStorageClass(r.loadAffinity, du.Spec.CSISnapshot.SnapshotClass, log)
return &exposer.CSISnapshotExposeParam{
@@ -925,6 +936,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload
StorageClass: du.Spec.CSISnapshot.StorageClass,
HostingPodLabels: hostingPodLabels,
HostingPodAnnotations: hostingPodAnnotation,
HostingPodTolerations: hostingPodTolerations,
AccessMode: accessMode,
OperationTimeout: du.Spec.OperationTimeout.Duration,
ExposeTimeout: r.preparingTimeout,

View File

@@ -761,9 +761,14 @@ func (r *PodVolumeBackupReconciler) closeDataPath(ctx context.Context, pvbName s
func (r *PodVolumeBackupReconciler) setupExposeParam(pvb *velerov1api.PodVolumeBackup) exposer.PodVolumeExposeParam {
log := r.logger.WithField("PVB", pvb.Name)
nodeOS, err := kube.GetNodeOS(context.Background(), pvb.Spec.Node, r.kubeClient.CoreV1())
if err != nil {
log.WithError(err).Warnf("Failed to get nodeOS for node %s, use linux node-agent for hosting pod labels, annotations and tolerations", pvb.Spec.Node)
}
hostingPodLabels := map[string]string{velerov1api.PVBLabel: pvb.Name}
for _, k := range util.ThirdPartyLabels {
if v, err := nodeagent.GetLabelValue(context.Background(), r.kubeClient, pvb.Namespace, k, ""); err != nil {
if v, err := nodeagent.GetLabelValue(context.Background(), r.kubeClient, pvb.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentLabelNotFound {
log.WithError(err).Warnf("Failed to check node-agent label, skip adding host pod label %s", k)
}
@@ -774,7 +779,7 @@ func (r *PodVolumeBackupReconciler) setupExposeParam(pvb *velerov1api.PodVolumeB
hostingPodAnnotation := map[string]string{}
for _, k := range util.ThirdPartyAnnotations {
if v, err := nodeagent.GetAnnotationValue(context.Background(), r.kubeClient, pvb.Namespace, k, ""); err != nil {
if v, err := nodeagent.GetAnnotationValue(context.Background(), r.kubeClient, pvb.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentAnnotationNotFound {
log.WithError(err).Warnf("Failed to check node-agent annotation, skip adding host pod annotation %s", k)
}
@@ -783,6 +788,17 @@ func (r *PodVolumeBackupReconciler) setupExposeParam(pvb *velerov1api.PodVolumeB
}
}
hostingPodTolerations := []corev1api.Toleration{}
for _, k := range util.ThirdPartyTolerations {
if v, err := nodeagent.GetToleration(context.Background(), r.kubeClient, pvb.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentTolerationNotFound {
log.WithError(err).Warnf("Failed to check node-agent toleration, skip adding host pod toleration %s", k)
}
} else {
hostingPodTolerations = append(hostingPodTolerations, *v)
}
}
return exposer.PodVolumeExposeParam{
Type: exposer.PodVolumeExposeTypeBackup,
ClientNamespace: pvb.Spec.Pod.Namespace,
@@ -790,6 +806,7 @@ func (r *PodVolumeBackupReconciler) setupExposeParam(pvb *velerov1api.PodVolumeB
ClientPodVolume: pvb.Spec.Volume,
HostingPodLabels: hostingPodLabels,
HostingPodAnnotations: hostingPodAnnotation,
HostingPodTolerations: hostingPodTolerations,
OperationTimeout: r.resourceTimeout,
Resources: r.podResources,
}

View File

@@ -820,9 +820,14 @@ func (r *PodVolumeRestoreReconciler) OnDataPathProgress(ctx context.Context, nam
func (r *PodVolumeRestoreReconciler) setupExposeParam(pvr *velerov1api.PodVolumeRestore) exposer.PodVolumeExposeParam {
log := r.logger.WithField("PVR", pvr.Name)
nodeOS, err := kube.GetNodeOS(context.Background(), pvr.Status.Node, r.kubeClient.CoreV1())
if err != nil {
log.WithError(err).Warnf("Failed to get nodeOS for node %s, use linux node-agent for hosting pod labels, annotations and tolerations", pvr.Status.Node)
}
hostingPodLabels := map[string]string{velerov1api.PVRLabel: pvr.Name}
for _, k := range util.ThirdPartyLabels {
if v, err := nodeagent.GetLabelValue(context.Background(), r.kubeClient, pvr.Namespace, k, ""); err != nil {
if v, err := nodeagent.GetLabelValue(context.Background(), r.kubeClient, pvr.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentLabelNotFound {
log.WithError(err).Warnf("Failed to check node-agent label, skip adding host pod label %s", k)
}
@@ -833,7 +838,7 @@ func (r *PodVolumeRestoreReconciler) setupExposeParam(pvr *velerov1api.PodVolume
hostingPodAnnotation := map[string]string{}
for _, k := range util.ThirdPartyAnnotations {
if v, err := nodeagent.GetAnnotationValue(context.Background(), r.kubeClient, pvr.Namespace, k, ""); err != nil {
if v, err := nodeagent.GetAnnotationValue(context.Background(), r.kubeClient, pvr.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentAnnotationNotFound {
log.WithError(err).Warnf("Failed to check node-agent annotation, skip adding host pod annotation %s", k)
}
@@ -842,6 +847,17 @@ func (r *PodVolumeRestoreReconciler) setupExposeParam(pvr *velerov1api.PodVolume
}
}
hostingPodTolerations := []corev1api.Toleration{}
for _, k := range util.ThirdPartyTolerations {
if v, err := nodeagent.GetToleration(context.Background(), r.kubeClient, pvr.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentTolerationNotFound {
log.WithError(err).Warnf("Failed to check node-agent toleration, skip adding host pod toleration %s", k)
}
} else {
hostingPodTolerations = append(hostingPodTolerations, *v)
}
}
return exposer.PodVolumeExposeParam{
Type: exposer.PodVolumeExposeTypeRestore,
ClientNamespace: pvr.Spec.Pod.Namespace,
@@ -849,6 +865,7 @@ func (r *PodVolumeRestoreReconciler) setupExposeParam(pvr *velerov1api.PodVolume
ClientPodVolume: pvr.Spec.Volume,
HostingPodLabels: hostingPodLabels,
HostingPodAnnotations: hostingPodAnnotation,
HostingPodTolerations: hostingPodTolerations,
OperationTimeout: r.resourceTimeout,
Resources: r.podResources,
}

View File

@@ -59,6 +59,9 @@ type CSISnapshotExposeParam struct {
// HostingPodAnnotations is the annotations that are going to apply to the hosting pod
HostingPodAnnotations map[string]string
// HostingPodTolerations is the tolerations that are going to apply to the hosting pod
HostingPodTolerations []corev1api.Toleration
// OperationTimeout specifies the time wait for resources operations in Expose
OperationTimeout time.Duration
@@ -215,6 +218,7 @@ func (e *csiSnapshotExposer) Expose(ctx context.Context, ownerObject corev1api.O
csiExposeParam.OperationTimeout,
csiExposeParam.HostingPodLabels,
csiExposeParam.HostingPodAnnotations,
csiExposeParam.HostingPodTolerations,
csiExposeParam.Affinity,
csiExposeParam.Resources,
backupPVCReadOnly,
@@ -528,6 +532,7 @@ func (e *csiSnapshotExposer) createBackupPod(
operationTimeout time.Duration,
label map[string]string,
annotation map[string]string,
toleration []corev1api.Toleration,
affinity *kube.LoadAffinity,
resources corev1api.ResourceRequirements,
backupPVCReadOnly bool,
@@ -586,7 +591,6 @@ func (e *csiSnapshotExposer) createBackupPod(
var securityCtx *corev1api.PodSecurityContext
nodeSelector := map[string]string{}
podOS := corev1api.PodOS{}
toleration := []corev1api.Toleration{}
if nodeOS == kube.NodeOSWindows {
userID := "ContainerAdministrator"
securityCtx = &corev1api.PodSecurityContext{

View File

@@ -49,6 +49,9 @@ type GenericRestoreExposeParam struct {
// HostingPodAnnotations is the annotations that are going to apply to the hosting pod
HostingPodAnnotations map[string]string
// HostingPodTolerations is the tolerations that are going to apply to the hosting pod
HostingPodTolerations []corev1api.Toleration
// Resources defines the resource requirements of the hosting pod
Resources corev1api.ResourceRequirements
@@ -140,6 +143,7 @@ func (e *genericRestoreExposer) Expose(ctx context.Context, ownerObject corev1ap
param.OperationTimeout,
param.HostingPodLabels,
param.HostingPodAnnotations,
param.HostingPodTolerations,
selectedNode,
param.Resources,
param.NodeOS,
@@ -405,6 +409,7 @@ func (e *genericRestoreExposer) createRestorePod(
operationTimeout time.Duration,
label map[string]string,
annotation map[string]string,
toleration []corev1api.Toleration,
selectedNode string,
resources corev1api.ResourceRequirements,
nodeOS string,
@@ -467,7 +472,6 @@ func (e *genericRestoreExposer) createRestorePod(
var securityCtx *corev1api.PodSecurityContext
nodeSelector := map[string]string{}
podOS := corev1api.PodOS{}
toleration := []corev1api.Toleration{}
if nodeOS == kube.NodeOSWindows {
userID := "ContainerAdministrator"
securityCtx = &corev1api.PodSecurityContext{

View File

@@ -813,6 +813,28 @@ func TestCreateRestorePod(t *testing.T) {
},
}
daemonSetWin := &appsv1api.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: "node-agent-windows",
},
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
APIVersion: appsv1api.SchemeGroupVersion.String(),
},
Spec: appsv1api.DaemonSetSpec{
Template: corev1api.PodTemplateSpec{
Spec: corev1api.PodSpec{
Containers: []corev1api.Container{
{
Image: "fake-image",
},
},
},
},
},
}
targetPVCObj := &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns",
@@ -828,11 +850,12 @@ func TestCreateRestorePod(t *testing.T) {
kubeClientObj []runtime.Object
selectedNode string
affinity *kube.LoadAffinity
nodeOS string
expectedPod *corev1api.Pod
}{
{
name: "",
kubeClientObj: []runtime.Object{daemonSet, targetPVCObj},
name: "linux",
kubeClientObj: []runtime.Object{daemonSet, daemonSetWin, targetPVCObj},
selectedNode: "",
affinity: &kube.LoadAffinity{
NodeSelector: metav1.LabelSelector{
@@ -840,12 +863,31 @@ func TestCreateRestorePod(t *testing.T) {
{
Key: "kubernetes.io/os",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"Linux"},
Values: []string{"linux"},
},
},
},
StorageClass: scName,
},
nodeOS: "linux",
},
{
name: "windows",
kubeClientObj: []runtime.Object{daemonSet, daemonSetWin, targetPVCObj},
selectedNode: "",
affinity: &kube.LoadAffinity{
NodeSelector: metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "kubernetes.io/os",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"windows"},
},
},
},
StorageClass: scName,
},
nodeOS: "windows",
},
}
@@ -866,11 +908,12 @@ func TestCreateRestorePod(t *testing.T) {
},
targetPVCObj,
time.Second*3,
map[string]string{},
map[string]string{},
nil,
nil,
nil,
test.selectedNode,
corev1api.ResourceRequirements{},
"linux",
test.nodeOS,
test.affinity,
)

View File

@@ -59,6 +59,9 @@ type PodVolumeExposeParam struct {
// HostingPodAnnotations is the annotations that are going to apply to the hosting pod
HostingPodAnnotations map[string]string
// HostingPodTolerations is the tolerations that are going to apply to the hosting pod
HostingPodTolerations []corev1api.Toleration
// Resources defines the resource requirements of the hosting pod
Resources corev1api.ResourceRequirements
@@ -147,7 +150,7 @@ func (e *podVolumeExposer) Expose(ctx context.Context, ownerObject corev1api.Obj
curLog.WithField("path", path).Infof("Host path is retrieved for pod %s, volume %s", param.ClientPodName, param.ClientPodVolume)
hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, pod.Spec.NodeName, param.Resources, nodeOS)
hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, param.HostingPodTolerations, pod.Spec.NodeName, param.Resources, nodeOS)
if err != nil {
return errors.Wrapf(err, "error to create hosting pod")
}
@@ -263,7 +266,7 @@ func (e *podVolumeExposer) CleanUp(ctx context.Context, ownerObject corev1api.Ob
}
func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject corev1api.ObjectReference, exposeType string, hostPath string,
operationTimeout time.Duration, label map[string]string, annotation map[string]string, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string) (*corev1api.Pod, error) {
operationTimeout time.Duration, label map[string]string, annotation map[string]string, toleration []corev1api.Toleration, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string) (*corev1api.Pod, error) {
hostingPodName := ownerObject.Name
containerName := string(ownerObject.UID)
@@ -318,7 +321,6 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor
var securityCtx *corev1api.PodSecurityContext
nodeSelector := map[string]string{}
podOS := corev1api.PodOS{}
toleration := []corev1api.Toleration{}
if nodeOS == kube.NodeOSWindows {
userID := "ContainerAdministrator"
securityCtx = &corev1api.PodSecurityContext{

View File

@@ -53,6 +53,7 @@ var (
ErrDaemonSetNotFound = errors.New("daemonset not found")
ErrNodeAgentLabelNotFound = errors.New("node-agent label not found")
ErrNodeAgentAnnotationNotFound = errors.New("node-agent annotation not found")
ErrNodeAgentTolerationNotFound = errors.New("node-agent toleration not found")
)
type LoadConcurrency struct {
@@ -256,6 +257,26 @@ func GetAnnotationValue(ctx context.Context, kubeClient kubernetes.Interface, na
return val, nil
}
func GetToleration(ctx context.Context, kubeClient kubernetes.Interface, namespace string, key string, osType string) (*corev1api.Toleration, error) {
dsName := daemonSet
if osType == kube.NodeOSWindows {
dsName = daemonsetWindows
}
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
if err != nil {
return nil, errors.Wrapf(err, "error getting %s daemonset", dsName)
}
for i, t := range ds.Spec.Template.Spec.Tolerations {
if t.Key == key {
return &ds.Spec.Template.Spec.Tolerations[i], nil
}
}
return nil, ErrNodeAgentTolerationNotFound
}
func GetHostPodPath(ctx context.Context, kubeClient kubernetes.Interface, namespace string, osType string) (string, error) {
dsName := daemonSet
if osType == kube.NodeOSWindows {

View File

@@ -592,6 +592,116 @@ func TestGetAnnotationValue(t *testing.T) {
}
}
func TestGetToleration(t *testing.T) {
daemonSet := &appsv1api.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns",
Name: "node-agent",
},
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
},
}
daemonSetWithOtherToleration := &appsv1api.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns",
Name: "node-agent",
},
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
},
Spec: appsv1api.DaemonSetSpec{
Template: corev1api.PodTemplateSpec{
Spec: corev1api.PodSpec{
Tolerations: []corev1api.Toleration{
{
Key: "other-toleration-key",
},
},
},
},
},
}
daemonSetWithToleration := &appsv1api.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns",
Name: "node-agent",
},
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
},
Spec: appsv1api.DaemonSetSpec{
Template: corev1api.PodTemplateSpec{
Spec: corev1api.PodSpec{
Tolerations: []corev1api.Toleration{
{
Key: "fake-toleration",
Value: "true",
},
},
},
},
},
}
tests := []struct {
name string
kubeClientObj []runtime.Object
namespace string
expectedValue corev1api.Toleration
expectErr string
}{
// {
// name: "ds get error",
// namespace: "fake-ns",
// expectErr: "error getting node-agent daemonset: daemonsets.apps \"node-agent\" not found",
// },
{
name: "no toleration",
namespace: "fake-ns",
kubeClientObj: []runtime.Object{
daemonSet,
},
expectErr: ErrNodeAgentTolerationNotFound.Error(),
},
{
name: "no expecting toleration",
namespace: "fake-ns",
kubeClientObj: []runtime.Object{
daemonSetWithOtherToleration,
},
expectErr: ErrNodeAgentTolerationNotFound.Error(),
},
{
name: "expecting toleration",
namespace: "fake-ns",
kubeClientObj: []runtime.Object{
daemonSetWithToleration,
},
expectedValue: corev1api.Toleration{
Key: "fake-toleration",
Value: "true",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
value, err := GetToleration(context.TODO(), fakeKubeClient, test.namespace, "fake-toleration", kube.NodeOSLinux)
if test.expectErr == "" {
require.NoError(t, err)
assert.Equal(t, test.expectedValue, *value)
} else {
assert.EqualError(t, err, test.expectErr)
}
})
}
}
func TestGetHostPodPath(t *testing.T) {
daemonSet := &appsv1api.DaemonSet{
ObjectMeta: metav1.ObjectMeta{

View File

@@ -23,3 +23,8 @@ var ThirdPartyLabels = []string{
var ThirdPartyAnnotations = []string{
"iam.amazonaws.com/role",
}
var ThirdPartyTolerations = []string{
"kubernetes.azure.com/scalesetpriority",
"CriticalAddonsOnly",
}