From d2eafe63ed391ce7ce8fb0d89f95fb160babb596 Mon Sep 17 00:00:00 2001 From: Shubham Pampattiwar Date: Fri, 26 Sep 2025 08:14:23 -0700 Subject: [PATCH] Fix maintenance jobs toleration inheritance from Velero deployment (#9299) fix codespell and add changelog file (cherry picked from commit 5ba00dfb097de68c6312d7497c2ef2675a64d1a9) update changelog filename update changelog Signed-off-by: Shubham Pampattiwar --- .../unreleased/9299-shubham-pampattiwar | 1 + pkg/repository/maintenance/maintenance.go | 40 ++- .../maintenance/maintenance_test.go | 288 ++++++++++++++++++ 3 files changed, 320 insertions(+), 9 deletions(-) create mode 100644 changelogs/unreleased/9299-shubham-pampattiwar diff --git a/changelogs/unreleased/9299-shubham-pampattiwar b/changelogs/unreleased/9299-shubham-pampattiwar new file mode 100644 index 000000000..2400fb6b6 --- /dev/null +++ b/changelogs/unreleased/9299-shubham-pampattiwar @@ -0,0 +1 @@ +Fix repository maintenance jobs to inherit allowlisted tolerations from Velero deployment diff --git a/pkg/repository/maintenance/maintenance.go b/pkg/repository/maintenance/maintenance.go index 16a94535f..e43918d4d 100644 --- a/pkg/repository/maintenance/maintenance.go +++ b/pkg/repository/maintenance/maintenance.go @@ -449,6 +449,35 @@ func StartNewJob( return maintenanceJob.Name, nil } +// buildTolerationsForMaintenanceJob builds the tolerations for maintenance jobs. +// It includes the required Windows toleration for backward compatibility and filters +// tolerations from the Velero deployment to only include those with keys that are +// in the ThirdPartyTolerations allowlist, following the same pattern as labels and annotations. +func buildTolerationsForMaintenanceJob(deployment *appsv1api.Deployment) []corev1api.Toleration { + // Start with the Windows toleration for backward compatibility + windowsToleration := corev1api.Toleration{ + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + } + result := []corev1api.Toleration{windowsToleration} + + // Filter tolerations from the Velero deployment to only include allowed ones + // Only tolerations that exist on the deployment AND have keys in the allowlist are inherited + deploymentTolerations := veleroutil.GetTolerationsFromVeleroServer(deployment) + for _, k := range util.ThirdPartyTolerations { + for _, toleration := range deploymentTolerations { + if toleration.Key == k { + result = append(result, toleration) + break // Only add the first matching toleration for each allowed key + } + } + } + + return result +} + func getPriorityClassName(ctx context.Context, cli client.Client, config *velerotypes.JobConfigs, logger logrus.FieldLogger) string { // Use the priority class name from the global job configuration if available // Note: Priority class is only read from global config, not per-repository @@ -593,15 +622,8 @@ func buildJob( SecurityContext: podSecurityContext, Volumes: volumes, ServiceAccountName: serviceAccount, - Tolerations: []corev1api.Toleration{ - { - Key: "os", - Operator: "Equal", - Effect: "NoSchedule", - Value: "windows", - }, - }, - ImagePullSecrets: imagePullSecrets, + Tolerations: buildTolerationsForMaintenanceJob(deployment), + ImagePullSecrets: imagePullSecrets, }, }, }, diff --git a/pkg/repository/maintenance/maintenance_test.go b/pkg/repository/maintenance/maintenance_test.go index 925a94043..93d8f9b2f 100644 --- a/pkg/repository/maintenance/maintenance_test.go +++ b/pkg/repository/maintenance/maintenance_test.go @@ -1481,3 +1481,291 @@ func TestBuildJobWithPriorityClassName(t *testing.T) { }) } } + +func TestBuildTolerationsForMaintenanceJob(t *testing.T) { + windowsToleration := corev1api.Toleration{ + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + } + + testCases := []struct { + name string + deploymentTolerations []corev1api.Toleration + expectedTolerations []corev1api.Toleration + }{ + { + name: "no tolerations should only include Windows toleration", + deploymentTolerations: nil, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "empty tolerations should only include Windows toleration", + deploymentTolerations: []corev1api.Toleration{}, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "non-allowed toleration should not be inherited", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "vng-ondemand", + Operator: "Equal", + Effect: "NoSchedule", + Value: "amd64", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "allowed toleration should be inherited", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + }, + }, + { + name: "mixed allowed and non-allowed tolerations should only inherit allowed", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "vng-ondemand", // not in allowlist + Operator: "Equal", + Effect: "NoSchedule", + Value: "amd64", + }, + { + Key: "CriticalAddonsOnly", // in allowlist + Operator: "Exists", + Effect: "NoSchedule", + }, + { + Key: "custom-key", // not in allowlist + Operator: "Equal", + Effect: "NoSchedule", + Value: "custom-value", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + { + name: "multiple allowed tolerations should all be inherited", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a deployment with the specified tolerations + deployment := &appsv1api.Deployment{ + Spec: appsv1api.DeploymentSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Tolerations: tc.deploymentTolerations, + }, + }, + }, + } + + result := buildTolerationsForMaintenanceJob(deployment) + assert.Equal(t, tc.expectedTolerations, result) + }) + } +} + +func TestBuildJobWithTolerationsInheritance(t *testing.T) { + // Define allowed tolerations that would be set on Velero deployment + allowedTolerations := []corev1api.Toleration{ + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + } + + // Mixed tolerations (allowed and non-allowed) + mixedTolerations := []corev1api.Toleration{ + { + Key: "vng-ondemand", // not in allowlist + Operator: "Equal", + Effect: "NoSchedule", + Value: "amd64", + }, + { + Key: "CriticalAddonsOnly", // in allowlist + Operator: "Exists", + Effect: "NoSchedule", + }, + } + + // Windows toleration that should always be present + windowsToleration := corev1api.Toleration{ + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + } + + testCases := []struct { + name string + deploymentTolerations []corev1api.Toleration + expectedTolerations []corev1api.Toleration + }{ + { + name: "no tolerations on deployment should only have Windows toleration", + deploymentTolerations: nil, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "allowed tolerations should be inherited along with Windows toleration", + deploymentTolerations: allowedTolerations, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + { + name: "mixed tolerations should only inherit allowed ones", + deploymentTolerations: mixedTolerations, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a new scheme and add necessary API types + localScheme := runtime.NewScheme() + err := velerov1api.AddToScheme(localScheme) + require.NoError(t, err) + err = appsv1api.AddToScheme(localScheme) + require.NoError(t, err) + err = batchv1api.AddToScheme(localScheme) + require.NoError(t, err) + + // Create a deployment with the specified tolerations + deployment := &appsv1api.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "velero", + Namespace: "velero", + }, + Spec: appsv1api.DeploymentSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "velero", + Image: "velero/velero:latest", + }, + }, + Tolerations: tc.deploymentTolerations, + }, + }, + }, + } + + // Create a backup repository + repo := &velerov1api.BackupRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-repo", + Namespace: "velero", + }, + Spec: velerov1api.BackupRepositorySpec{ + VolumeNamespace: "velero", + BackupStorageLocation: "default", + }, + } + + // Create fake client and add the deployment + client := fake.NewClientBuilder().WithScheme(localScheme).WithObjects(deployment).Build() + + // Create minimal job configs and resources + jobConfig := &velerotypes.JobConfigs{} + logLevel := logrus.InfoLevel + logFormat := logging.NewFormatFlag() + logFormat.Set("text") + + // Call buildJob + job, err := buildJob(client, t.Context(), repo, "default", jobConfig, logLevel, logFormat, logrus.New()) + require.NoError(t, err) + + // Verify the tolerations are set correctly + assert.Equal(t, tc.expectedTolerations, job.Spec.Template.Spec.Tolerations) + }) + } +}