diff --git a/.github/workflows/e2e-test-kind.yaml b/.github/workflows/e2e-test-kind.yaml index ff3187051..65517804c 100644 --- a/.github/workflows/e2e-test-kind.yaml +++ b/.github/workflows/e2e-test-kind.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Go uses: actions/setup-go@v5 with: @@ -81,7 +81,7 @@ jobs: fail-fast: false steps: - name: Check out the code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Go uses: actions/setup-go@v5 with: diff --git a/.github/workflows/nightly-trivy-scan.yml b/.github/workflows/nightly-trivy-scan.yml index ac463863b..dc8363e39 100644 --- a/.github/workflows/nightly-trivy-scan.yml +++ b/.github/workflows/nightly-trivy-scan.yml @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master diff --git a/.github/workflows/pr-changelog-check.yml b/.github/workflows/pr-changelog-check.yml index 96052fc2d..6cda5a63e 100644 --- a/.github/workflows/pr-changelog-check.yml +++ b/.github/workflows/pr-changelog-check.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out the code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Changelog check if: ${{ !(contains(github.event.pull_request.labels.*.name, 'kind/changelog-not-required') || contains(github.event.pull_request.labels.*.name, 'Design') || contains(github.event.pull_request.labels.*.name, 'Website') || contains(github.event.pull_request.labels.*.name, 'Documentation'))}} diff --git a/.github/workflows/pr-ci-check.yml b/.github/workflows/pr-ci-check.yml index 23f7a12f1..0a394560a 100644 --- a/.github/workflows/pr-ci-check.yml +++ b/.github/workflows/pr-ci-check.yml @@ -8,7 +8,7 @@ jobs: fail-fast: false steps: - name: Check out the code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Go uses: actions/setup-go@v5 with: diff --git a/.github/workflows/pr-codespell.yml b/.github/workflows/pr-codespell.yml index 900371ca2..9f8e44825 100644 --- a/.github/workflows/pr-codespell.yml +++ b/.github/workflows/pr-codespell.yml @@ -8,14 +8,14 @@ jobs: steps: - name: Check out the code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Codespell uses: codespell-project/actions-codespell@master with: # ignore the config/.../crd.go file as it's generated binary data that is edited elsewhere. skip: .git,*.png,*.jpg,*.woff,*.ttf,*.gif,*.ico,./config/crd/v1beta1/crds/crds.go,./config/crd/v1/crds/crds.go,./config/crd/v2alpha1/crds/crds.go,./go.sum,./LICENSE - ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin + ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin,sie check_filenames: true check_hidden: true diff --git a/.github/workflows/pr-containers.yml b/.github/workflows/pr-containers.yml index 345f24362..0f1823f8a 100644 --- a/.github/workflows/pr-containers.yml +++ b/.github/workflows/pr-containers.yml @@ -13,7 +13,7 @@ jobs: name: Build runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 name: Checkout - name: Set up QEMU diff --git a/.github/workflows/pr-goreleaser.yml b/.github/workflows/pr-goreleaser.yml index aed88ab14..2fdc5bc6e 100644 --- a/.github/workflows/pr-goreleaser.yml +++ b/.github/workflows/pr-goreleaser.yml @@ -14,7 +14,7 @@ jobs: name: Build runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 name: Checkout - name: Verify .goreleaser.yml and try a dryrun release. diff --git a/.github/workflows/pr-linter-check.yml b/.github/workflows/pr-linter-check.yml index f3b4397b4..997466ccf 100644 --- a/.github/workflows/pr-linter-check.yml +++ b/.github/workflows/pr-linter-check.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Go uses: actions/setup-go@v5 with: diff --git a/.github/workflows/push-builder.yml b/.github/workflows/push-builder.yml index d4461bbdd..0663ffd2f 100644 --- a/.github/workflows/push-builder.yml +++ b/.github/workflows/push-builder.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: # The default value is "1" which fetches only a single commit. If we merge PR without squash or rebase, # there are at least two commits: the first one is the merge commit and the second one is the real commit diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 212167e98..8dee8799a 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Go uses: actions/setup-go@v5 with: diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml index 7a538a114..e7cd1fccb 100644 --- a/.github/workflows/rebase.yml +++ b/.github/workflows/rebase.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout the latest code - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Automatic Rebase diff --git a/changelogs/unreleased/8883-kaovilai b/changelogs/unreleased/8883-kaovilai new file mode 100644 index 000000000..63be04bf1 --- /dev/null +++ b/changelogs/unreleased/8883-kaovilai @@ -0,0 +1 @@ +Implement PriorityClass Support diff --git a/changelogs/unreleased/9145-reasonerjt b/changelogs/unreleased/9145-reasonerjt new file mode 100644 index 000000000..0cf694adf --- /dev/null +++ b/changelogs/unreleased/9145-reasonerjt @@ -0,0 +1 @@ +Add include/exclude policy to resources policy \ No newline at end of file diff --git a/changelogs/unreleased/9147-blackpiglet b/changelogs/unreleased/9147-blackpiglet new file mode 100644 index 000000000..dadb7d558 --- /dev/null +++ b/changelogs/unreleased/9147-blackpiglet @@ -0,0 +1 @@ +Remove the repository maintenance job parameters from velero server. diff --git a/changelogs/unreleased/9165-Lyndon-Li b/changelogs/unreleased/9165-Lyndon-Li new file mode 100644 index 000000000..e7a371263 --- /dev/null +++ b/changelogs/unreleased/9165-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #9140, add os=windows:NoSchedule toleration for Windows pods \ No newline at end of file diff --git a/changelogs/unreleased/9168-priyansh17 b/changelogs/unreleased/9168-priyansh17 new file mode 100644 index 000000000..66b97075a --- /dev/null +++ b/changelogs/unreleased/9168-priyansh17 @@ -0,0 +1 @@ +Introduced context-based logger for backend implementations (Azure, GCS, S3, and Filesystem) \ No newline at end of file diff --git a/design/priority-class-name-support_design.md b/design/Implemented/priority-class-name-support_design.md similarity index 76% rename from design/priority-class-name-support_design.md rename to design/Implemented/priority-class-name-support_design.md index 2555b3a55..f47d6ece0 100644 --- a/design/priority-class-name-support_design.md +++ b/design/Implemented/priority-class-name-support_design.md @@ -1,28 +1,34 @@ # PriorityClass Support Design Proposal ## Abstract + This design document outlines the implementation of priority class name support for Velero components, including the Velero server deployment, node agent daemonset, and maintenance jobs. This feature allows users to specify a priority class name for Velero components, which can be used to influence the scheduling and eviction behavior of these components. ## Background + Kubernetes allows users to define priority classes, which can be used to influence the scheduling and eviction behavior of pods. Priority classes are defined as cluster-wide resources, and pods can reference them by name. When a pod is created, the priority admission controller uses the priority class name to populate the priority value for the pod. The scheduler then uses this priority value to determine the order in which pods are scheduled. Currently, Velero does not provide a way for users to specify a priority class name for its components. This can be problematic in clusters where resource contention is high, as Velero components may be evicted or not scheduled in a timely manner, potentially impacting backup and restore operations. ## Goals + - Add support for specifying priority class names for Velero components - Update the Velero CLI to accept priority class name parameters for different components - Update the Velero deployment, node agent daemonset, maintenance jobs, and data mover pods to use the specified priority class names ## Non Goals + - Creating or managing priority classes - Automatically determining the appropriate priority class for Velero components ## High-Level Design + The implementation will add new fields to the Velero options struct to store the priority class names for the server deployment and node agent daemonset. The Velero CLI will be updated to accept new flags for these components. For data mover pods and maintenance jobs, priority class names will be configured through existing ConfigMap mechanisms (`node-agent-configmap` for data movers and `repo-maintenance-job-configmap` for maintenance jobs). The Velero deployment, node agent daemonset, maintenance jobs, and data mover pods will be updated to use their respective priority class names. ## Detailed Design ### CLI Changes + New flags will be added to the `velero install` command to specify priority class names for different components: ```go @@ -44,6 +50,7 @@ flags.StringVar( Note: Priority class names for data mover pods and maintenance jobs will be configured through their respective ConfigMaps (`--node-agent-configmap` for data movers and `--repo-maintenance-job-configmap` for maintenance jobs). ### Velero Options Changes + The `VeleroOptions` struct in `pkg/install/resources.go` will be updated to include new fields for priority class names: ```go @@ -55,6 +62,7 @@ type VeleroOptions struct { ``` ### Deployment Changes + The `podTemplateConfig` struct in `pkg/install/deployment.go` will be updated to include a new field for the priority class name: ```go @@ -93,6 +101,7 @@ deployment := &appsv1api.Deployment{ ``` ### DaemonSet Changes + The `DaemonSet` function will use the priority class name passed via the podTemplateConfig (from the CLI flag): ```go @@ -112,6 +121,7 @@ daemonSet := &appsv1api.DaemonSet{ ``` ### Maintenance Job Changes + The `JobConfigs` struct in `pkg/repository/maintenance/maintenance.go` will be updated to include a field for the priority class name: ```go @@ -187,6 +197,7 @@ velero install --provider aws \ The ConfigMap can be updated after installation to change the priority class for future maintenance jobs. Note that only the "global" configuration is used for priority class - all maintenance jobs will use the same priority class regardless of which repository they are maintaining. ### Node Agent ConfigMap Changes + We'll update the `Configs` struct in `pkg/nodeagent/node_agent.go` to include a field for the priority class name in the node-agent-configmap: ```go @@ -284,14 +295,47 @@ A new function, `GetDataMoverPriorityClassName`, will be added to the `pkg/util/ // GetDataMoverPriorityClassName retrieves the priority class name for data mover pods from the node-agent-configmap func GetDataMoverPriorityClassName(ctx context.Context, namespace string, kubeClient kubernetes.Interface, configName string) (string, error) { - // Get from node-agent-configmap - configs, err := nodeagent.GetConfigs(ctx, namespace, kubeClient, configName) - if err == nil && configs != nil && configs.PriorityClassName != "" { - return configs.PriorityClassName, nil + // configData is a minimal struct to parse only the priority class name from the ConfigMap + type configData struct { + PriorityClassName string `json:"priorityClassName,omitempty"` } - - // Return empty string if not found in configmap - return "", nil + + // Get the ConfigMap + cm, err := kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, configName, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + // ConfigMap not found is not an error, just return empty string + return "", nil + } + return "", errors.Wrapf(err, "error getting node agent config map %s", configName) + } + + if cm.Data == nil { + // No data in ConfigMap, return empty string + return "", nil + } + + // Extract the first value from the ConfigMap data + jsonString := "" + for _, v := range cm.Data { + jsonString = v + break // Use the first value found + } + + if jsonString == "" { + // No data to parse, return empty string + return "", nil + } + + // Parse the JSON to extract priority class name + var config configData + if err := json.Unmarshal([]byte(jsonString), &config); err != nil { + // Invalid JSON is not a critical error for priority class + // Just return empty string to use default behavior + return "", nil + } + + return config.PriorityClassName, nil } ``` @@ -307,11 +351,12 @@ To improve observability and help with troubleshooting, the implementation will // In pkg/util/kube/priority_class.go // ValidatePriorityClass checks if the specified priority class exists in the cluster -// Returns nil if the priority class exists or if priorityClassName is empty -// Returns a warning (not an error) if the priority class doesn't exist -func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, priorityClassName string, logger logrus.FieldLogger) { +// Returns true if the priority class exists or if priorityClassName is empty +// Returns false if the priority class doesn't exist or validation fails +// Logs warnings when the priority class doesn't exist +func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, priorityClassName string, logger logrus.FieldLogger) bool { if priorityClassName == "" { - return + return true } _, err := kubeClient.SchedulingV1().PriorityClasses().Get(ctx, priorityClassName, metav1.GetOptions{}) @@ -321,9 +366,10 @@ func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, } else { logger.WithError(err).Warnf("Failed to validate priority class %q", priorityClassName) } - } else { - logger.Infof("Validated priority class %q exists in cluster", priorityClassName) + return false } + logger.Infof("Validated priority class %q exists in cluster", priorityClassName) + return true } ``` @@ -352,6 +398,7 @@ if priorityClassName != "" { ``` These validation and logging features will help administrators: + - Identify configuration issues early (validation warnings) - Troubleshoot priority class application issues - Verify that priority classes are being applied as expected @@ -371,20 +418,30 @@ The `ValidatePriorityClass` function should be called at the following points: - Before creating maintenance jobs Example usage: + ```go // During velero install if o.ServerPriorityClassName != "" { - kube.ValidatePriorityClass(ctx, kubeClient, o.ServerPriorityClassName, logger.WithField("component", "server")) + _ = kube.ValidatePriorityClass(ctx, kubeClient, o.ServerPriorityClassName, logger.WithField("component", "server")) + // For install command, we continue even if validation fails (warnings are logged) } -// When reading from ConfigMap +// When reading from ConfigMap in node-agent server priorityClassName, err := kube.GetDataMoverPriorityClassName(ctx, namespace, kubeClient, configMapName) if err == nil && priorityClassName != "" { - kube.ValidatePriorityClass(ctx, kubeClient, priorityClassName, logger.WithField("component", "data-mover")) + // Validate the priority class exists in the cluster + if kube.ValidatePriorityClass(ctx, kubeClient, priorityClassName, logger.WithField("component", "data-mover")) { + dataMovePriorityClass = priorityClassName + logger.WithField("priorityClassName", priorityClassName).Info("Using priority class for data mover pods") + } else { + logger.WithField("priorityClassName", priorityClassName).Warn("Priority class not found in cluster, data mover pods will use default priority") + // Clear the priority class to prevent pod creation failures + priorityClassName = "" + } } ``` -Note: Since validation only logs warnings (not errors), it won't block operations if a priority class doesn't exist. This allows for scenarios where priority classes might be created after Velero installation. +Note: The validation function returns a boolean to allow callers to decide how to handle missing priority classes. For the install command, validation failures are ignored (only warnings are logged) to allow for scenarios where priority classes might be created after Velero installation. For runtime components like the node-agent server, the priority class is cleared if validation fails to prevent pod creation failures. ## Alternatives Considered @@ -402,6 +459,26 @@ There are no security considerations for this feature. This feature is compatible with all Kubernetes versions that support priority classes. The PodPriority feature became stable in Kubernetes 1.14. For more information, see the [Kubernetes documentation on Pod Priority and Preemption](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/). +## ConfigMap Update Strategy + +### Static ConfigMap Reading at Startup + +The node-agent server reads and parses the ConfigMap once during initialization and passes configurations (like `podResources`, `loadAffinity`, and `priorityClassName`) directly to controllers as parameters. This approach ensures: + +- Single ConfigMap read to minimize API calls +- Consistent configuration across all controllers +- Validation of priority classes at startup with fallback behavior +- No need for complex update mechanisms or watchers + +ConfigMap changes require a restart of the node-agent to take effect. + +### Implementation Approach + +1. **Data Mover Controllers**: Receive priority class as a string parameter from node-agent server at initialization +2. **Maintenance Job Controller**: Read fresh configuration from repo-maintenance-job-configmap at job creation time +3. ConfigMap changes require restart of components to take effect +4. Priority class validation happens at startup with automatic fallback to prevent failures + ## Implementation The implementation will involve the following steps: @@ -519,10 +596,10 @@ velero install \ When configuring priority classes for Velero components, consider the following hierarchy based on component criticality: -1. **Velero Server (Highest Priority)**: +1. **Velero Server (Highest Priority)**: - Example: `velero-critical` with value 100 - Rationale: The server must remain running to coordinate backup/restore operations - + 2. **Node Agent DaemonSet (Medium Priority)**: - Example: `velero-standard` with value 50 - Rationale: Node agents need to be available on nodes but are less critical than the server @@ -544,37 +621,47 @@ This approach has several advantages: The priority class name for data mover pods will be determined by checking the node-agent-configmap. This approach provides a centralized way to configure priority class names for all data mover pods. The same approach will be used for PVB (PodVolumeBackup) and PVR (PodVolumeRestore) pods, which will also retrieve their priority class name from the node-agent-configmap. -For PVB and PVR pods specifically, the controllers will need to be updated to retrieve the priority class name from the node-agent-configmap and pass it to the pod creation functions. For example, in the PodVolumeBackup controller: +For PVB and PVR pods specifically, the implementation follows this approach: + +1. **Controller Initialization**: Both PodVolumeBackup and PodVolumeRestore controllers are updated to accept a priority class name as a string parameter. The node-agent server reads the priority class from the node-agent-configmap once at startup: ```go -// In pkg/controller/pod_volume_backup_controller.go -priorityClassName, _ := kube.GetDataMoverPriorityClassName(ctx, namespace, kubeClient, configMapName) +// In node-agent server startup (pkg/cmd/cli/nodeagent/server.go) +dataMovePriorityClass := "" +if s.config.nodeAgentConfig != "" { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) + defer cancel() + priorityClass, err := kube.GetDataMoverPriorityClassName(ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig) + if err != nil { + s.logger.WithError(err).Warn("Failed to get priority class name from node-agent-configmap, using empty value") + } else if priorityClass != "" { + // Validate the priority class exists in the cluster + if kube.ValidatePriorityClass(ctx, s.kubeClient, priorityClass, s.logger.WithField("component", "data-mover")) { + dataMovePriorityClass = priorityClass + s.logger.WithField("priorityClassName", priorityClass).Info("Using priority class for data mover pods") + } else { + s.logger.WithField("priorityClassName", priorityClass).Warn("Priority class not found in cluster, data mover pods will use default priority") + } + } +} -// Add priorityClassName to the pod spec -pod := &corev1api.Pod{ +// Pass priority class to controllers +pvbReconciler := controller.NewPodVolumeBackupReconciler( + s.mgr.GetClient(), s.mgr, s.kubeClient, ..., dataMovePriorityClass) +pvrReconciler := controller.NewPodVolumeRestoreReconciler( + s.mgr.GetClient(), s.mgr, s.kubeClient, ..., dataMovePriorityClass) +``` + +2. **Controller Structure**: Controllers store the priority class name as a field: + +```go +type PodVolumeBackupReconciler struct { // ... existing fields ... - Spec: corev1api.PodSpec{ - // ... existing fields ... - PriorityClassName: priorityClassName, - }, + dataMovePriorityClass string } ``` -Similarly, in the PodVolumeRestore controller: - -```go -// In pkg/controller/pod_volume_restore_controller.go -priorityClassName, _ := kube.GetDataMoverPriorityClassName(ctx, namespace, kubeClient, configMapName) - -// Add priorityClassName to the pod spec -pod := &corev1api.Pod{ - // ... existing fields ... - Spec: corev1api.PodSpec{ - // ... existing fields ... - PriorityClassName: priorityClassName, - }, -} -``` +3. **Pod Creation**: The priority class is included in the pod spec when creating data mover pods. ### VGDP Micro-Service Considerations @@ -582,6 +669,26 @@ With the introduction of VGDP micro-services (as described in the VGDP micro-ser This ensures that all pods created by Velero for data movement operations (CSI snapshot data movement, PVB, and PVR) use a consistent approach for priority class name configuration through the node-agent-configmap. +### How Exposers Receive Configuration + +CSI Snapshot Exposer and Generic Restore Exposer do not directly watch or read ConfigMaps. Instead, they receive configuration through their parent controllers: + +1. **Controller Initialization**: Controllers receive the priority class name as a parameter during initialization from the node-agent server. + +2. **Configuration Propagation**: During reconciliation of resources: + - The controller calls `setupExposeParam()` which includes the `dataMovePriorityClass` value + - For CSI operations: `CSISnapshotExposeParam.PriorityClassName` is set + - For generic restore: `GenericRestoreExposeParam.PriorityClassName` is set + - The controller passes these parameters to the exposer's `Expose()` method + +3. **Pod Creation**: The exposer creates pods with the priority class name provided by the controller. + +This design keeps exposers stateless and ensures: +- Exposers remain simple and focused on pod creation +- All configuration flows through controllers consistently +- No complex state synchronization between components +- Configuration changes require component restart to take effect + ## Open Issues None. diff --git a/internal/resourcepolicies/resource_policies.go b/internal/resourcepolicies/resource_policies.go index 7d3930280..0837e63e2 100644 --- a/internal/resourcepolicies/resource_policies.go +++ b/internal/resourcepolicies/resource_policies.go @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package resourcepolicies import ( @@ -20,6 +21,8 @@ import ( "fmt" "strings" + "k8s.io/apimachinery/pkg/util/sets" + "github.com/pkg/errors" "github.com/sirupsen/logrus" corev1api "k8s.io/api/core/v1" @@ -49,24 +52,58 @@ type Action struct { Parameters map[string]any `yaml:"parameters,omitempty"` } -// volumePolicy defined policy to conditions to match Volumes and related action to handle matched Volumes +// IncludeExcludePolicy defined policy to include or exclude resources based on the names +type IncludeExcludePolicy struct { + // The following fields have the same semantics as those from the spec of backup. + // Refer to the comment in the velerov1api.BackupSpec for more details. + IncludedClusterScopedResources []string `yaml:"includedClusterScopedResources"` + ExcludedClusterScopedResources []string `yaml:"excludedClusterScopedResources"` + IncludedNamespaceScopedResources []string `yaml:"includedNamespaceScopedResources"` + ExcludedNamespaceScopedResources []string `yaml:"excludedNamespaceScopedResources"` +} + +func (p *IncludeExcludePolicy) Validate() error { + if err := p.validateIncludeExclude(p.IncludedClusterScopedResources, p.ExcludedClusterScopedResources); err != nil { + return err + } + return p.validateIncludeExclude(p.IncludedNamespaceScopedResources, p.ExcludedNamespaceScopedResources) +} + +func (p *IncludeExcludePolicy) validateIncludeExclude(includesList, excludesList []string) error { + includes := sets.NewString(includesList...) + excludes := sets.NewString(excludesList...) + + if includes.Has("*") || excludes.Has("*") { + return fmt.Errorf("cannot use '*' in includes or excludes filters in the policy") + } + for _, itm := range excludes.List() { + if includes.Has(itm) { + return fmt.Errorf("excludes list cannot contain an item in the includes list: %s", itm) + } + } + return nil +} + +// VolumePolicy defined policy to conditions to match Volumes and related action to handle matched Volumes type VolumePolicy struct { // Conditions defined list of conditions to match Volumes Conditions map[string]any `yaml:"conditions"` Action Action `yaml:"action"` } -// resourcePolicies currently defined slice of volume policies to handle backup +// ResourcePolicies currently defined slice of volume policies to handle backup type ResourcePolicies struct { - Version string `yaml:"version"` - VolumePolicies []VolumePolicy `yaml:"volumePolicies"` + Version string `yaml:"version"` + VolumePolicies []VolumePolicy `yaml:"volumePolicies"` + IncludeExcludePolicy *IncludeExcludePolicy `yaml:"includeExcludePolicy"` // we may support other resource policies in the future, and they could be added separately // OtherResourcePolicies []OtherResourcePolicy } type Policies struct { - version string - volumePolicies []volPolicy + version string + volumePolicies []volPolicy + includeExcludePolicy *IncludeExcludePolicy // OtherPolicies } @@ -115,6 +152,7 @@ func (p *Policies) BuildPolicy(resPolicies *ResourcePolicies) error { // Other resource policies p.version = resPolicies.Version + p.includeExcludePolicy = resPolicies.IncludeExcludePolicy return nil } @@ -175,9 +213,20 @@ func (p *Policies) Validate() error { } } } + + if p.GetIncludeExcludePolicy() != nil { + if err := p.GetIncludeExcludePolicy().Validate(); err != nil { + return errors.WithStack(err) + } + } + return nil } +func (p *Policies) GetIncludeExcludePolicy() *IncludeExcludePolicy { + return p.includeExcludePolicy +} + func GetResourcePoliciesFromBackup( backup velerov1api.Backup, client crclient.Client, diff --git a/internal/resourcepolicies/volume_resources_validator_test.go b/internal/resourcepolicies/volume_resources_validator_test.go index cf795b3ae..f2812a786 100644 --- a/internal/resourcepolicies/volume_resources_validator_test.go +++ b/internal/resourcepolicies/volume_resources_validator_test.go @@ -453,6 +453,102 @@ func TestValidate(t *testing.T) { }, wantErr: true, }, + { + name: " '*' in the filters of include exclude policy - 1", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"*"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods"}, + ExcludedNamespaceScopedResources: []string{"secrets"}, + }, + }, + wantErr: true, + }, + { + name: " '*' in the filters of include exclude policy - 2", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods"}, + ExcludedNamespaceScopedResources: []string{"*"}, + }, + }, + wantErr: true, + }, + { + name: " dup item in both the include and exclude filters of include exclude policy", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods", "configmaps"}, + ExcludedNamespaceScopedResources: []string{"secrets", "pods"}, + }, + }, + wantErr: true, + }, + { + name: " valid volume policies and valid include/exclude policy", + res: &ResourcePolicies{ + Version: "v1", + VolumePolicies: []VolumePolicy{ + { + Action: Action{Type: "skip"}, + Conditions: map[string]any{ + "pvcLabels": map[string]string{ + "environment": "production", + "app": "database", + }, + }, + }, + }, + IncludeExcludePolicy: &IncludeExcludePolicy{ + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + IncludedNamespaceScopedResources: []string{"pods", "configmaps"}, + ExcludedNamespaceScopedResources: []string{"secrets"}, + }, + }, + wantErr: false, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/pkg/backup/backup.go b/pkg/backup/backup.go index 755eaea56..a11acd52a 100644 --- a/pkg/backup/backup.go +++ b/pkg/backup/backup.go @@ -43,7 +43,6 @@ import ( kbclient "sigs.k8s.io/controller-runtime/pkg/client" "github.com/vmware-tanzu/velero/internal/hook" - "github.com/vmware-tanzu/velero/internal/resourcepolicies" "github.com/vmware-tanzu/velero/internal/volume" "github.com/vmware-tanzu/velero/internal/volumehelper" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" @@ -270,13 +269,17 @@ func (kb *kubernetesBackupper) BackupWithResolvers( backupRequest.Spec.IncludeClusterResources, *backupRequest.NamespaceIncludesExcludes) } else { - backupRequest.ResourceIncludesExcludes = collections.GetScopeResourceIncludesExcludes(kb.discoveryHelper, log, + srie := collections.GetScopeResourceIncludesExcludes(kb.discoveryHelper, log, backupRequest.Spec.IncludedNamespaceScopedResources, backupRequest.Spec.ExcludedNamespaceScopedResources, backupRequest.Spec.IncludedClusterScopedResources, backupRequest.Spec.ExcludedClusterScopedResources, *backupRequest.NamespaceIncludesExcludes, ) + if backupRequest.ResPolicies != nil { + srie.CombineWithPolicy(backupRequest.ResPolicies.GetIncludeExcludePolicy()) + } + backupRequest.ResourceIncludesExcludes = srie } log.Infof("Backing up all volumes using pod volume backup: %t", boolptr.IsSetToTrue(backupRequest.Backup.Spec.DefaultVolumesToFsBackup)) @@ -355,11 +358,6 @@ func (kb *kubernetesBackupper) BackupWithResolvers( } backupRequest.Status.Progress = &velerov1api.BackupProgress{TotalItems: len(items)} - var resourcePolicy *resourcepolicies.Policies - if backupRequest.ResPolicies != nil { - resourcePolicy = backupRequest.ResPolicies - } - itemBackupper := &itemBackupper{ backupRequest: backupRequest, tarWriter: tw, @@ -374,7 +372,7 @@ func (kb *kubernetesBackupper) BackupWithResolvers( }, hookTracker: hook.NewHookTracker(), volumeHelperImpl: volumehelper.NewVolumeHelperImpl( - resourcePolicy, + backupRequest.ResPolicies, backupRequest.Spec.SnapshotVolumes, log, kb.kbClient, diff --git a/pkg/cmd/cli/install/install.go b/pkg/cmd/cli/install/install.go index 2a6194f7b..7079ec79b 100644 --- a/pkg/cmd/cli/install/install.go +++ b/pkg/cmd/cli/install/install.go @@ -91,6 +91,8 @@ type Options struct { ItemBlockWorkerCount int NodeAgentDisableHostPath bool kubeletRootDir string + ServerPriorityClassName string + NodeAgentPriorityClassName string } // BindFlags adds command line values to the options struct. @@ -194,6 +196,18 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) { o.ItemBlockWorkerCount, "Number of worker threads to process ItemBlocks. Default is one. Optional.", ) + flags.StringVar( + &o.ServerPriorityClassName, + "server-priority-class-name", + o.ServerPriorityClassName, + "Priority class name for the Velero server deployment. Optional.", + ) + flags.StringVar( + &o.NodeAgentPriorityClassName, + "node-agent-priority-class-name", + o.NodeAgentPriorityClassName, + "Priority class name for the node agent daemonset. Optional.", + ) } // NewInstallOptions instantiates a new, default InstallOptions struct. @@ -301,6 +315,8 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) { ItemBlockWorkerCount: o.ItemBlockWorkerCount, KubeletRootDir: o.kubeletRootDir, NodeAgentDisableHostPath: o.NodeAgentDisableHostPath, + ServerPriorityClassName: o.ServerPriorityClassName, + NodeAgentPriorityClassName: o.NodeAgentPriorityClassName, }, nil } @@ -389,6 +405,7 @@ func (o *Options) Run(c *cobra.Command, f client.Factory) error { if err != nil { return err } + errorMsg := fmt.Sprintf("\n\nError installing Velero. Use `kubectl logs deploy/velero -n %s` to check the deploy logs", o.Namespace) err = install.Install(dynamicFactory, kbClient, resources, os.Stdout) diff --git a/pkg/cmd/cli/install/install_test.go b/pkg/cmd/cli/install/install_test.go new file mode 100644 index 000000000..c5d147646 --- /dev/null +++ b/pkg/cmd/cli/install/install_test.go @@ -0,0 +1,93 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package install + +import ( + "testing" + + "github.com/spf13/pflag" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPriorityClassNameFlag(t *testing.T) { + // Test that the flag is properly defined + o := NewInstallOptions() + flags := pflag.NewFlagSet("test", pflag.ContinueOnError) + o.BindFlags(flags) + + // Verify the server priority class flag exists + serverFlag := flags.Lookup("server-priority-class-name") + assert.NotNil(t, serverFlag, "server-priority-class-name flag should exist") + assert.Equal(t, "Priority class name for the Velero server deployment. Optional.", serverFlag.Usage) + + // Verify the node agent priority class flag exists + nodeAgentFlag := flags.Lookup("node-agent-priority-class-name") + assert.NotNil(t, nodeAgentFlag, "node-agent-priority-class-name flag should exist") + assert.Equal(t, "Priority class name for the node agent daemonset. Optional.", nodeAgentFlag.Usage) + + // Test with values for both server and node agent + testCases := []struct { + name string + serverPriorityClassName string + nodeAgentPriorityClassName string + expectedServerValue string + expectedNodeAgentValue string + }{ + { + name: "with both priority class names", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "medium-priority", + expectedServerValue: "high-priority", + expectedNodeAgentValue: "medium-priority", + }, + { + name: "with only server priority class name", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "", + expectedServerValue: "high-priority", + expectedNodeAgentValue: "", + }, + { + name: "with only node agent priority class name", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "medium-priority", + expectedServerValue: "", + expectedNodeAgentValue: "medium-priority", + }, + { + name: "without priority class names", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "", + expectedServerValue: "", + expectedNodeAgentValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + o := NewInstallOptions() + o.ServerPriorityClassName = tc.serverPriorityClassName + o.NodeAgentPriorityClassName = tc.nodeAgentPriorityClassName + + veleroOptions, err := o.AsVeleroOptions() + require.NoError(t, err) + assert.Equal(t, tc.expectedServerValue, veleroOptions.ServerPriorityClassName) + assert.Equal(t, tc.expectedNodeAgentValue, veleroOptions.NodeAgentPriorityClassName) + }) + } +} diff --git a/pkg/cmd/cli/nodeagent/server.go b/pkg/cmd/cli/nodeagent/server.go index dae91d91f..0ff5b2f1c 100644 --- a/pkg/cmd/cli/nodeagent/server.go +++ b/pkg/cmd/cli/nodeagent/server.go @@ -280,6 +280,21 @@ func (s *nodeAgentServer) run() { s.logger.Info("Starting controllers") + // Get priority class from dataPathConfigs if available + dataMovePriorityClass := "" + if s.dataPathConfigs != nil && s.dataPathConfigs.PriorityClassName != "" { + priorityClass := s.dataPathConfigs.PriorityClassName + // Validate the priority class exists in the cluster + ctx, cancel := context.WithTimeout(s.ctx, time.Second*30) + defer cancel() + if kube.ValidatePriorityClass(ctx, s.kubeClient, priorityClass, s.logger.WithField("component", "data-mover")) { + dataMovePriorityClass = priorityClass + s.logger.WithField("priorityClassName", priorityClass).Info("Using priority class for data mover pods") + } else { + s.logger.WithField("priorityClassName", priorityClass).Warn("Priority class not found in cluster, data mover pods will use default priority") + } + } + var loadAffinity []*kube.LoadAffinity if s.dataPathConfigs != nil && len(s.dataPathConfigs.LoadAffinity) > 0 { loadAffinity = s.dataPathConfigs.LoadAffinity @@ -311,12 +326,12 @@ func (s *nodeAgentServer) run() { } } - pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger) + pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass) if err := pvbReconciler.SetupWithManager(s.mgr); err != nil { s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup) } - pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger) + pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger, dataMovePriorityClass) if err := pvrReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller") } @@ -340,6 +355,7 @@ func (s *nodeAgentServer) run() { s.config.dataMoverPrepareTimeout, s.logger, s.metrics, + dataMovePriorityClass, ) if err := dataUploadReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the data upload controller") @@ -364,6 +380,7 @@ func (s *nodeAgentServer) run() { s.config.dataMoverPrepareTimeout, s.logger, s.metrics, + dataMovePriorityClass, ) if err := dataDownloadReconciler.SetupWithManager(s.mgr); err != nil { diff --git a/pkg/cmd/server/config/config.go b/pkg/cmd/server/config/config.go index d1577fc58..9d08a9715 100644 --- a/pkg/cmd/server/config/config.go +++ b/pkg/cmd/server/config/config.go @@ -16,7 +16,6 @@ import ( podvolumeconfigs "github.com/vmware-tanzu/velero/pkg/podvolume/configs" "github.com/vmware-tanzu/velero/pkg/types" "github.com/vmware-tanzu/velero/pkg/uploader" - "github.com/vmware-tanzu/velero/pkg/util/kube" "github.com/vmware-tanzu/velero/pkg/util/logging" ) @@ -47,12 +46,6 @@ const ( defaultMaxConcurrentK8SConnections = 30 defaultDisableInformerCache = false - DefaultKeepLatestMaintenanceJobs = 3 - DefaultMaintenanceJobCPURequest = "0" - DefaultMaintenanceJobCPULimit = "0" - DefaultMaintenanceJobMemRequest = "0" - DefaultMaintenanceJobMemLimit = "0" - DefaultItemBlockWorkerCount = 1 ) @@ -179,8 +172,6 @@ type Config struct { CredentialsDirectory string BackupRepoConfig string RepoMaintenanceJobConfig string - PodResources kube.PodResources - KeepLatestMaintenanceJobs int ItemBlockWorkerCount int } @@ -213,14 +204,7 @@ func GetDefaultConfig() *Config { DisableInformerCache: defaultDisableInformerCache, ScheduleSkipImmediately: false, CredentialsDirectory: credentials.DefaultStoreDirectory(), - PodResources: kube.PodResources{ - CPURequest: DefaultMaintenanceJobCPULimit, - CPULimit: DefaultMaintenanceJobCPURequest, - MemoryRequest: DefaultMaintenanceJobMemRequest, - MemoryLimit: DefaultMaintenanceJobMemLimit, - }, - KeepLatestMaintenanceJobs: DefaultKeepLatestMaintenanceJobs, - ItemBlockWorkerCount: DefaultItemBlockWorkerCount, + ItemBlockWorkerCount: DefaultItemBlockWorkerCount, } return config @@ -258,36 +242,6 @@ func (c *Config) BindFlags(flags *pflag.FlagSet) { flags.BoolVar(&c.ScheduleSkipImmediately, "schedule-skip-immediately", c.ScheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).") flags.Var(&c.DefaultVolumeSnapshotLocations, "default-volume-snapshot-locations", "List of unique volume providers and default volume snapshot location (provider1:location-01,provider2:location-02,...)") - flags.IntVar( - &c.KeepLatestMaintenanceJobs, - "keep-latest-maintenance-jobs", - c.KeepLatestMaintenanceJobs, - "Number of latest maintenance jobs to keep each repository. Optional.", - ) - flags.StringVar( - &c.PodResources.CPURequest, - "maintenance-job-cpu-request", - c.PodResources.CPURequest, - "CPU request for maintenance job. Default is no limit.", - ) - flags.StringVar( - &c.PodResources.MemoryRequest, - "maintenance-job-mem-request", - c.PodResources.MemoryRequest, - "Memory request for maintenance job. Default is no limit.", - ) - flags.StringVar( - &c.PodResources.CPULimit, - "maintenance-job-cpu-limit", - c.PodResources.CPULimit, - "CPU limit for maintenance job. Default is no limit.", - ) - flags.StringVar( - &c.PodResources.MemoryLimit, - "maintenance-job-mem-limit", - c.PodResources.MemoryLimit, - "Memory limit for maintenance job. Default is no limit.", - ) flags.StringVar( &c.BackupRepoConfig, "backup-repository-configmap", diff --git a/pkg/cmd/server/config/config_test.go b/pkg/cmd/server/config/config_test.go index 28071d62e..ba17437f1 100644 --- a/pkg/cmd/server/config/config_test.go +++ b/pkg/cmd/server/config/config_test.go @@ -9,11 +9,11 @@ import ( func TestGetDefaultConfig(t *testing.T) { config := GetDefaultConfig() - assert.Equal(t, "0", config.PodResources.CPULimit) + assert.Equal(t, 1, config.ItemBlockWorkerCount) } func TestBindFlags(t *testing.T) { config := GetDefaultConfig() config.BindFlags(pflag.CommandLine) - assert.Equal(t, "0", config.PodResources.CPULimit) + assert.Equal(t, 1, config.ItemBlockWorkerCount) } diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index d89f88fdb..e7ade6d31 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -738,9 +738,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string s.repoManager, s.config.RepoMaintenanceFrequency, s.config.BackupRepoConfig, - s.config.KeepLatestMaintenanceJobs, s.config.RepoMaintenanceJobConfig, - s.config.PodResources, s.logLevel, s.config.LogFormat, ).SetupWithManager(s.mgr); err != nil { diff --git a/pkg/cmd/util/output/backup_describer_test.go b/pkg/cmd/util/output/backup_describer_test.go index 1d158f1ed..96e25466f 100644 --- a/pkg/cmd/util/output/backup_describer_test.go +++ b/pkg/cmd/util/output/backup_describer_test.go @@ -235,9 +235,10 @@ Hooks: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -292,9 +293,10 @@ OrderedResources: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -325,9 +327,10 @@ Hooks: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: diff --git a/pkg/cmd/util/output/schedule_describe_test.go b/pkg/cmd/util/output/schedule_describe_test.go index 4d002619b..12e8c5858 100644 --- a/pkg/cmd/util/output/schedule_describe_test.go +++ b/pkg/cmd/util/output/schedule_describe_test.go @@ -32,9 +32,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -81,9 +82,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -127,9 +129,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: @@ -174,9 +177,10 @@ Backup Template: Excluded: Resources: - Included: * - Excluded: - Cluster-scoped: auto + Included cluster-scoped: + Excluded cluster-scoped: + Included namespace-scoped: * + Excluded namespace-scoped: Label selector: diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index 18b351e20..532a5f332 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -558,8 +558,11 @@ func (b *backupReconciler) prepareBackupRequest(backup *velerov1api.Backup, logg if err != nil { request.Status.ValidationErrors = append(request.Status.ValidationErrors, err.Error()) } + if resourcePolicies != nil && resourcePolicies.GetIncludeExcludePolicy() != nil && collections.UseOldResourceFilters(request.Spec) { + request.Status.ValidationErrors = append(request.Status.ValidationErrors, "include-resources, exclude-resources and include-cluster-resources are old filter parameters.\n"+ + "They cannot be used with include-exclude policies.") + } request.ResPolicies = resourcePolicies - return request } @@ -812,7 +815,6 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { fatalErrs = append(fatalErrs, errs...) } } - b.logger.WithField(constant.ControllerBackup, kubeutil.NamespaceAndName(backup)).Infof("Initial backup processing complete, moving to %s", backup.Status.Phase) // if we return a non-nil error, the calling function will update diff --git a/pkg/controller/backup_controller_test.go b/pkg/controller/backup_controller_test.go index eb20af907..715f5068a 100644 --- a/pkg/controller/backup_controller_test.go +++ b/pkg/controller/backup_controller_test.go @@ -702,10 +702,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -740,10 +741,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: "alt-loc", - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: "alt-loc", + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -782,10 +784,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: "read-write", - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: "read-write", + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -820,11 +823,12 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - TTL: metav1.Duration{Duration: 10 * time.Minute}, - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + TTL: metav1.Duration{Duration: 10 * time.Minute}, + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -860,10 +864,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -900,10 +905,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -940,10 +946,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -980,10 +987,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1020,10 +1028,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1061,10 +1070,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFailed, @@ -1102,10 +1112,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.True(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.True(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFailed, @@ -1143,10 +1154,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.True(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.True(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1185,10 +1197,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1227,10 +1240,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1269,10 +1283,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.True(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.True(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1312,10 +1327,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.False(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.False(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, @@ -1354,10 +1370,11 @@ func TestProcessBackupCompletions(t *testing.T) { }, }, Spec: velerov1api.BackupSpec{ - StorageLocation: defaultBackupLocation.Name, - DefaultVolumesToFsBackup: boolptr.False(), - SnapshotMoveData: boolptr.True(), - ExcludedResources: append(autoExcludeNamespaceScopedResources, autoExcludeClusterScopedResources...), + StorageLocation: defaultBackupLocation.Name, + DefaultVolumesToFsBackup: boolptr.False(), + SnapshotMoveData: boolptr.True(), + ExcludedClusterScopedResources: autoExcludeClusterScopedResources, + ExcludedNamespaceScopedResources: autoExcludeNamespaceScopedResources, }, Status: velerov1api.BackupStatus{ Phase: velerov1api.BackupPhaseFinalizing, diff --git a/pkg/controller/backup_repository_controller.go b/pkg/controller/backup_repository_controller.go index 52ccef6ae..fe6f71aa2 100644 --- a/pkg/controller/backup_repository_controller.go +++ b/pkg/controller/backup_repository_controller.go @@ -57,17 +57,15 @@ const ( type BackupRepoReconciler struct { client.Client - namespace string - logger logrus.FieldLogger - clock clocks.WithTickerAndDelayedExecution - maintenanceFrequency time.Duration - backupRepoConfig string - repositoryManager repomanager.Manager - keepLatestMaintenanceJobs int - repoMaintenanceConfig string - maintenanceJobResources kube.PodResources - logLevel logrus.Level - logFormat *logging.FormatFlag + namespace string + logger logrus.FieldLogger + clock clocks.WithTickerAndDelayedExecution + maintenanceFrequency time.Duration + backupRepoConfig string + repositoryManager repomanager.Manager + repoMaintenanceConfig string + logLevel logrus.Level + logFormat *logging.FormatFlag } func NewBackupRepoReconciler( @@ -77,9 +75,7 @@ func NewBackupRepoReconciler( repositoryManager repomanager.Manager, maintenanceFrequency time.Duration, backupRepoConfig string, - keepLatestMaintenanceJobs int, repoMaintenanceConfig string, - maintenanceJobResources kube.PodResources, logLevel logrus.Level, logFormat *logging.FormatFlag, ) *BackupRepoReconciler { @@ -91,9 +87,7 @@ func NewBackupRepoReconciler( maintenanceFrequency, backupRepoConfig, repositoryManager, - keepLatestMaintenanceJobs, repoMaintenanceConfig, - maintenanceJobResources, logLevel, logFormat, } @@ -275,15 +269,13 @@ func (r *BackupRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, errors.Wrap(err, "error check and run repo maintenance jobs") } - // Get the configured number of maintenance jobs to keep from ConfigMap, fallback to CLI parameter - keepJobs := r.keepLatestMaintenanceJobs - if configuredKeep, err := maintenance.GetKeepLatestMaintenanceJobs(ctx, r.Client, log, r.namespace, r.repoMaintenanceConfig, backupRepo); err != nil { + // Get the configured number of maintenance jobs to keep from ConfigMap + keepJobs, err := maintenance.GetKeepLatestMaintenanceJobs(ctx, r.Client, log, r.namespace, r.repoMaintenanceConfig, backupRepo) + if err != nil { log.WithError(err).Warn("Failed to get keepLatestMaintenanceJobs from ConfigMap, using CLI parameter value") - } else if configuredKeep > 0 { - keepJobs = configuredKeep } - if err := maintenance.DeleteOldJobs(r.Client, req.Name, keepJobs); err != nil { + if err := maintenance.DeleteOldJobs(r.Client, req.Name, keepJobs, log); err != nil { log.WithError(err).Warn("Failed to delete old maintenance jobs") } } @@ -496,7 +488,7 @@ func (r *BackupRepoReconciler) runMaintenanceIfDue(ctx context.Context, req *vel log.Info("Running maintenance on backup repository") - job, err := funcStartMaintenanceJob(r.Client, ctx, req, r.repoMaintenanceConfig, r.maintenanceJobResources, r.logLevel, r.logFormat, log) + job, err := funcStartMaintenanceJob(r.Client, ctx, req, r.repoMaintenanceConfig, r.logLevel, r.logFormat, log) if err != nil { log.WithError(err).Warn("Starting repo maintenance failed") return r.patchBackupRepository(ctx, req, func(rr *velerov1api.BackupRepository) { diff --git a/pkg/controller/backup_repository_controller_test.go b/pkg/controller/backup_repository_controller_test.go index 5b019ae58..b8de27539 100644 --- a/pkg/controller/backup_repository_controller_test.go +++ b/pkg/controller/backup_repository_controller_test.go @@ -39,7 +39,6 @@ import ( repomokes "github.com/vmware-tanzu/velero/pkg/repository/mocks" repotypes "github.com/vmware-tanzu/velero/pkg/repository/types" velerotest "github.com/vmware-tanzu/velero/pkg/test" - "github.com/vmware-tanzu/velero/pkg/util/kube" "github.com/vmware-tanzu/velero/pkg/util/logging" "sigs.k8s.io/controller-runtime/pkg/client" @@ -63,9 +62,7 @@ func mockBackupRepoReconciler(t *testing.T, mockOn string, arg any, ret ...any) mgr, testMaintenanceFrequency, "fake-repo-config", - 3, "", - kube.PodResources{}, logrus.InfoLevel, nil, ) @@ -176,11 +173,11 @@ func TestCheckNotReadyRepo(t *testing.T) { }) } -func startMaintenanceJobFail(client.Client, context.Context, *velerov1api.BackupRepository, string, kube.PodResources, logrus.Level, *logging.FormatFlag, logrus.FieldLogger) (string, error) { +func startMaintenanceJobFail(client.Client, context.Context, *velerov1api.BackupRepository, string, logrus.Level, *logging.FormatFlag, logrus.FieldLogger) (string, error) { return "", errors.New("fake-start-error") } -func startMaintenanceJobSucceed(client.Client, context.Context, *velerov1api.BackupRepository, string, kube.PodResources, logrus.Level, *logging.FormatFlag, logrus.FieldLogger) (string, error) { +func startMaintenanceJobSucceed(client.Client, context.Context, *velerov1api.BackupRepository, string, logrus.Level, *logging.FormatFlag, logrus.FieldLogger) (string, error) { return "fake-job-name", nil } @@ -243,7 +240,7 @@ func TestRunMaintenanceIfDue(t *testing.T) { tests := []struct { name string repo *velerov1api.BackupRepository - startJobFunc func(client.Client, context.Context, *velerov1api.BackupRepository, string, kube.PodResources, logrus.Level, *logging.FormatFlag, logrus.FieldLogger) (string, error) + startJobFunc func(client.Client, context.Context, *velerov1api.BackupRepository, string, logrus.Level, *logging.FormatFlag, logrus.FieldLogger) (string, error) waitJobFunc func(client.Client, context.Context, string, string, logrus.FieldLogger) (velerov1api.BackupRepositoryMaintenanceStatus, error) expectedMaintenanceTime time.Time expectedHistory []velerov1api.BackupRepositoryMaintenanceStatus @@ -584,9 +581,7 @@ func TestGetRepositoryMaintenanceFrequency(t *testing.T) { &mgr, test.userDefinedFreq, "", - 3, "", - kube.PodResources{}, logrus.InfoLevel, nil, ) @@ -718,11 +713,10 @@ func TestNeedInvalidBackupRepo(t *testing.T) { nil, time.Duration(0), "", - 3, "", - kube.PodResources{}, logrus.InfoLevel, - nil) + nil, + ) need := reconciler.needInvalidBackupRepo(test.oldBSL, test.newBSL) assert.Equal(t, test.expect, need) @@ -1474,96 +1468,10 @@ func TestGetLastMaintenanceTimeFromHistory(t *testing.T) { } } -// This test verify the BackupRepository controller will keep no more jobs -// than the number of test case's keptJobNumber. -func TestDeleteOldMaintenanceJob(t *testing.T) { - now := time.Now().Round(time.Second) - - tests := []struct { - name string - repo *velerov1api.BackupRepository - keptJobNumber int // The BackupRepository controller's keepLatestMaintenanceJobs parameter - expectNil bool - maintenanceJobs []batchv1api.Job - bsl *velerov1api.BackupStorageLocation - }{ - { - name: "test maintenance job cleaning when repo is ready", - repo: &velerov1api.BackupRepository{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: velerov1api.DefaultNamespace, - Name: "repo", - }, - Spec: velerov1api.BackupRepositorySpec{ - MaintenanceFrequency: metav1.Duration{Duration: testMaintenanceFrequency}, - BackupStorageLocation: "default", - }, - Status: velerov1api.BackupRepositoryStatus{ - LastMaintenanceTime: &metav1.Time{Time: time.Now()}, - RecentMaintenance: []velerov1api.BackupRepositoryMaintenanceStatus{ - { - StartTimestamp: &metav1.Time{Time: now.Add(-time.Minute)}, - CompleteTimestamp: &metav1.Time{Time: now}, - Result: velerov1api.BackupRepositoryMaintenanceSucceeded, - }, - }, Phase: velerov1api.BackupRepositoryPhaseReady, - }, - }, - keptJobNumber: 1, - expectNil: true, - maintenanceJobs: []batchv1api.Job{ - *builder.ForJob("velero", "job-01").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), - *builder.ForJob("velero", "job-02").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), - }, - bsl: builder.ForBackupStorageLocation("velero", "default").Result(), - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - crClient := velerotest.NewFakeControllerRuntimeClient(t, test.repo, test.bsl) - for _, job := range test.maintenanceJobs { - require.NoError(t, crClient.Create(t.Context(), &job)) - } - - repoLocker := repository.NewRepoLocker() - mgr := repomanager.NewManager("", crClient, repoLocker, nil, nil, nil) - - reconciler := NewBackupRepoReconciler( - velerov1api.DefaultNamespace, - velerotest.NewLogger(), - crClient, - mgr, - time.Duration(0), - "", - test.keptJobNumber, - "", - kube.PodResources{}, - logrus.InfoLevel, - nil, - ) - - _, err := reconciler.Reconcile(t.Context(), ctrl.Request{NamespacedName: types.NamespacedName{Namespace: test.repo.Namespace, Name: "repo"}}) - if test.expectNil { - require.NoError(t, err) - } else { - require.Error(t, err) - } - - if len(test.maintenanceJobs) > 0 { - jobList := new(batchv1api.JobList) - require.NoError(t, reconciler.Client.List(t.Context(), jobList, &client.ListOptions{Namespace: "velero"})) - assert.Len(t, jobList.Items, 1) - } - }) - } -} - func TestDeleteOldMaintenanceJobWithConfigMap(t *testing.T) { tests := []struct { name string repo *velerov1api.BackupRepository - serverKeepJobs int expectedKeptJobs int maintenanceJobs []batchv1api.Job bsl *velerov1api.BackupStorageLocation @@ -1586,7 +1494,6 @@ func TestDeleteOldMaintenanceJobWithConfigMap(t *testing.T) { Phase: velerov1api.BackupRepositoryPhaseReady, }, }, - serverKeepJobs: 3, expectedKeptJobs: 5, maintenanceJobs: []batchv1api.Job{ *builder.ForJob("velero", "job-01").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), @@ -1624,7 +1531,6 @@ func TestDeleteOldMaintenanceJobWithConfigMap(t *testing.T) { Phase: velerov1api.BackupRepositoryPhaseReady, }, }, - serverKeepJobs: 3, expectedKeptJobs: 2, maintenanceJobs: []batchv1api.Job{ *builder.ForJob("velero", "job-01").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), @@ -1643,34 +1549,6 @@ func TestDeleteOldMaintenanceJobWithConfigMap(t *testing.T) { }, }, }, - { - name: "test fallback to CLI parameter when no ConfigMap", - repo: &velerov1api.BackupRepository{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: velerov1api.DefaultNamespace, - Name: "repo", - }, - Spec: velerov1api.BackupRepositorySpec{ - MaintenanceFrequency: metav1.Duration{Duration: testMaintenanceFrequency}, - BackupStorageLocation: "default", - VolumeNamespace: "test-ns", - RepositoryType: "restic", - }, - Status: velerov1api.BackupRepositoryStatus{ - Phase: velerov1api.BackupRepositoryPhaseReady, - }, - }, - serverKeepJobs: 2, - expectedKeptJobs: 2, - maintenanceJobs: []batchv1api.Job{ - *builder.ForJob("velero", "job-01").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), - *builder.ForJob("velero", "job-02").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), - *builder.ForJob("velero", "job-03").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), - *builder.ForJob("velero", "job-04").ObjectMeta(builder.WithLabels(repomaintenance.RepositoryNameLabel, "repo")).Succeeded(1).Result(), - }, - bsl: builder.ForBackupStorageLocation("velero", "default").Result(), - repoMaintenanceJob: nil, // No ConfigMap - }, } for _, test := range tests { @@ -1700,9 +1578,7 @@ func TestDeleteOldMaintenanceJobWithConfigMap(t *testing.T) { mgr, time.Duration(0), "", - test.serverKeepJobs, repoMaintenanceConfigName, - kube.PodResources{}, logrus.InfoLevel, nil, ) @@ -1759,9 +1635,7 @@ func TestInitializeRepoWithRepositoryTypes(t *testing.T) { mgr, testMaintenanceFrequency, "", - 3, "", - kube.PodResources{}, logrus.InfoLevel, nil, ) @@ -1812,9 +1686,7 @@ func TestInitializeRepoWithRepositoryTypes(t *testing.T) { mgr, testMaintenanceFrequency, "", - 3, "", - kube.PodResources{}, logrus.InfoLevel, nil, ) @@ -1864,9 +1736,7 @@ func TestInitializeRepoWithRepositoryTypes(t *testing.T) { mgr, testMaintenanceFrequency, "", - 3, "", - kube.PodResources{}, logrus.InfoLevel, nil, ) diff --git a/pkg/controller/data_download_controller.go b/pkg/controller/data_download_controller.go index c4b6f63ba..a8a2fc633 100644 --- a/pkg/controller/data_download_controller.go +++ b/pkg/controller/data_download_controller.go @@ -71,6 +71,7 @@ type DataDownloadReconciler struct { preparingTimeout time.Duration metrics *metrics.ServerMetrics cancelledDataDownload map[string]time.Time + dataMovePriorityClass string } func NewDataDownloadReconciler( @@ -86,6 +87,7 @@ func NewDataDownloadReconciler( preparingTimeout time.Duration, logger logrus.FieldLogger, metrics *metrics.ServerMetrics, + dataMovePriorityClass string, ) *DataDownloadReconciler { return &DataDownloadReconciler{ client: client, @@ -103,6 +105,7 @@ func NewDataDownloadReconciler( preparingTimeout: preparingTimeout, metrics: metrics, cancelledDataDownload: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } @@ -890,6 +893,7 @@ func (r *DataDownloadReconciler) setupExposeParam(dd *velerov2alpha1api.DataDown NodeOS: nodeOS, RestorePVCConfig: r.restorePVCConfig, LoadAffinity: r.loadAffinity, + PriorityClassName: r.dataMovePriorityClass, }, nil } diff --git a/pkg/controller/data_download_controller_test.go b/pkg/controller/data_download_controller_test.go index 4dc17ea9f..048a1027c 100644 --- a/pkg/controller/data_download_controller_test.go +++ b/pkg/controller/data_download_controller_test.go @@ -130,19 +130,7 @@ func initDataDownloadReconcilerWithError(t *testing.T, objects []any, needError dataPathMgr := datapath.NewManager(1) - return NewDataDownloadReconciler( - &fakeClient, - nil, - fakeKubeClient, - dataPathMgr, - nil, - nil, - nodeagent.RestorePVC{}, - corev1api.ResourceRequirements{}, - "test-node", - time.Minute*5, - velerotest.NewLogger(), - metrics.NewServerMetrics()), nil + return NewDataDownloadReconciler(&fakeClient, nil, fakeKubeClient, dataPathMgr, nil, nil, nodeagent.RestorePVC{}, corev1api.ResourceRequirements{}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics(), ""), nil } func TestDataDownloadReconcile(t *testing.T) { diff --git a/pkg/controller/data_upload_controller.go b/pkg/controller/data_upload_controller.go index e5adecd63..502385c14 100644 --- a/pkg/controller/data_upload_controller.go +++ b/pkg/controller/data_upload_controller.go @@ -65,22 +65,23 @@ const ( // DataUploadReconciler reconciles a DataUpload object type DataUploadReconciler struct { - client client.Client - kubeClient kubernetes.Interface - csiSnapshotClient snapshotter.SnapshotV1Interface - mgr manager.Manager - Clock clocks.WithTickerAndDelayedExecution - nodeName string - logger logrus.FieldLogger - snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer - dataPathMgr *datapath.Manager - vgdpCounter *exposer.VgdpCounter - loadAffinity []*kube.LoadAffinity - backupPVCConfig map[string]nodeagent.BackupPVC - podResources corev1api.ResourceRequirements - preparingTimeout time.Duration - metrics *metrics.ServerMetrics - cancelledDataUpload map[string]time.Time + client client.Client + kubeClient kubernetes.Interface + csiSnapshotClient snapshotter.SnapshotV1Interface + mgr manager.Manager + Clock clocks.WithTickerAndDelayedExecution + nodeName string + logger logrus.FieldLogger + snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer + dataPathMgr *datapath.Manager + vgdpCounter *exposer.VgdpCounter + loadAffinity []*kube.LoadAffinity + backupPVCConfig map[string]nodeagent.BackupPVC + podResources corev1api.ResourceRequirements + preparingTimeout time.Duration + metrics *metrics.ServerMetrics + cancelledDataUpload map[string]time.Time + dataMovePriorityClass string } func NewDataUploadReconciler( @@ -98,6 +99,7 @@ func NewDataUploadReconciler( preparingTimeout time.Duration, log logrus.FieldLogger, metrics *metrics.ServerMetrics, + dataMovePriorityClass string, ) *DataUploadReconciler { return &DataUploadReconciler{ client: client, @@ -114,14 +116,15 @@ func NewDataUploadReconciler( log, ), }, - dataPathMgr: dataPathMgr, - vgdpCounter: counter, - loadAffinity: loadAffinity, - backupPVCConfig: backupPVCConfig, - podResources: podResources, - preparingTimeout: preparingTimeout, - metrics: metrics, - cancelledDataUpload: make(map[string]time.Time), + dataPathMgr: dataPathMgr, + vgdpCounter: counter, + loadAffinity: loadAffinity, + backupPVCConfig: backupPVCConfig, + podResources: podResources, + preparingTimeout: preparingTimeout, + metrics: metrics, + cancelledDataUpload: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } @@ -971,6 +974,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload BackupPVCConfig: r.backupPVCConfig, Resources: r.podResources, NodeOS: nodeOS, + PriorityClassName: r.dataMovePriorityClass, }, nil } diff --git a/pkg/controller/data_upload_controller_test.go b/pkg/controller/data_upload_controller_test.go index db4c29c1e..157ccd8d6 100644 --- a/pkg/controller/data_upload_controller_test.go +++ b/pkg/controller/data_upload_controller_test.go @@ -249,6 +249,7 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics(), + "", // dataMovePriorityClass ), nil } diff --git a/pkg/controller/pod_volume_backup_controller.go b/pkg/controller/pod_volume_backup_controller.go index f6eddc14b..3a446379f 100644 --- a/pkg/controller/pod_volume_backup_controller.go +++ b/pkg/controller/pod_volume_backup_controller.go @@ -60,41 +60,43 @@ const ( // NewPodVolumeBackupReconciler creates the PodVolumeBackupReconciler instance func NewPodVolumeBackupReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, counter *exposer.VgdpCounter, nodeName string, preparingTimeout time.Duration, resourceTimeout time.Duration, podResources corev1api.ResourceRequirements, - metrics *metrics.ServerMetrics, logger logrus.FieldLogger) *PodVolumeBackupReconciler { + metrics *metrics.ServerMetrics, logger logrus.FieldLogger, dataMovePriorityClass string) *PodVolumeBackupReconciler { return &PodVolumeBackupReconciler{ - client: client, - mgr: mgr, - kubeClient: kubeClient, - logger: logger.WithField("controller", "PodVolumeBackup"), - nodeName: nodeName, - clock: &clocks.RealClock{}, - metrics: metrics, - podResources: podResources, - dataPathMgr: dataPathMgr, - vgdpCounter: counter, - preparingTimeout: preparingTimeout, - resourceTimeout: resourceTimeout, - exposer: exposer.NewPodVolumeExposer(kubeClient, logger), - cancelledPVB: make(map[string]time.Time), + client: client, + mgr: mgr, + kubeClient: kubeClient, + logger: logger.WithField("controller", "PodVolumeBackup"), + nodeName: nodeName, + clock: &clocks.RealClock{}, + metrics: metrics, + podResources: podResources, + dataPathMgr: dataPathMgr, + vgdpCounter: counter, + preparingTimeout: preparingTimeout, + resourceTimeout: resourceTimeout, + exposer: exposer.NewPodVolumeExposer(kubeClient, logger), + cancelledPVB: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } // PodVolumeBackupReconciler reconciles a PodVolumeBackup object type PodVolumeBackupReconciler struct { - client client.Client - mgr manager.Manager - kubeClient kubernetes.Interface - clock clocks.WithTickerAndDelayedExecution - exposer exposer.PodVolumeExposer - metrics *metrics.ServerMetrics - nodeName string - logger logrus.FieldLogger - podResources corev1api.ResourceRequirements - dataPathMgr *datapath.Manager - vgdpCounter *exposer.VgdpCounter - preparingTimeout time.Duration - resourceTimeout time.Duration - cancelledPVB map[string]time.Time + client client.Client + mgr manager.Manager + kubeClient kubernetes.Interface + clock clocks.WithTickerAndDelayedExecution + exposer exposer.PodVolumeExposer + metrics *metrics.ServerMetrics + nodeName string + logger logrus.FieldLogger + podResources corev1api.ResourceRequirements + dataPathMgr *datapath.Manager + vgdpCounter *exposer.VgdpCounter + preparingTimeout time.Duration + resourceTimeout time.Duration + cancelledPVB map[string]time.Time + dataMovePriorityClass string } // +kubebuilder:rbac:groups=velero.io,resources=podvolumebackups,verbs=get;list;watch;create;update;patch;delete @@ -833,6 +835,8 @@ func (r *PodVolumeBackupReconciler) setupExposeParam(pvb *velerov1api.PodVolumeB HostingPodTolerations: hostingPodTolerations, OperationTimeout: r.resourceTimeout, Resources: r.podResources, + // Priority class name for the data mover pod, retrieved from node-agent-configmap + PriorityClassName: r.dataMovePriorityClass, } } diff --git a/pkg/controller/pod_volume_backup_controller_test.go b/pkg/controller/pod_volume_backup_controller_test.go index f58a7d1a8..51e75edb2 100644 --- a/pkg/controller/pod_volume_backup_controller_test.go +++ b/pkg/controller/pod_volume_backup_controller_test.go @@ -151,6 +151,7 @@ func initPVBReconcilerWithError(needError ...error) (*PodVolumeBackupReconciler, corev1api.ResourceRequirements{}, metrics.NewServerMetrics(), velerotest.NewLogger(), + "", // dataMovePriorityClass ), nil } diff --git a/pkg/controller/pod_volume_restore_controller.go b/pkg/controller/pod_volume_restore_controller.go index 69ff782b6..ce0d312a0 100644 --- a/pkg/controller/pod_volume_restore_controller.go +++ b/pkg/controller/pod_volume_restore_controller.go @@ -56,38 +56,40 @@ import ( func NewPodVolumeRestoreReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, counter *exposer.VgdpCounter, nodeName string, preparingTimeout time.Duration, resourceTimeout time.Duration, podResources corev1api.ResourceRequirements, - logger logrus.FieldLogger) *PodVolumeRestoreReconciler { + logger logrus.FieldLogger, dataMovePriorityClass string) *PodVolumeRestoreReconciler { return &PodVolumeRestoreReconciler{ - client: client, - mgr: mgr, - kubeClient: kubeClient, - logger: logger.WithField("controller", "PodVolumeRestore"), - nodeName: nodeName, - clock: &clocks.RealClock{}, - podResources: podResources, - dataPathMgr: dataPathMgr, - vgdpCounter: counter, - preparingTimeout: preparingTimeout, - resourceTimeout: resourceTimeout, - exposer: exposer.NewPodVolumeExposer(kubeClient, logger), - cancelledPVR: make(map[string]time.Time), + client: client, + mgr: mgr, + kubeClient: kubeClient, + logger: logger.WithField("controller", "PodVolumeRestore"), + nodeName: nodeName, + clock: &clocks.RealClock{}, + podResources: podResources, + dataPathMgr: dataPathMgr, + vgdpCounter: counter, + preparingTimeout: preparingTimeout, + resourceTimeout: resourceTimeout, + exposer: exposer.NewPodVolumeExposer(kubeClient, logger), + cancelledPVR: make(map[string]time.Time), + dataMovePriorityClass: dataMovePriorityClass, } } type PodVolumeRestoreReconciler struct { - client client.Client - mgr manager.Manager - kubeClient kubernetes.Interface - logger logrus.FieldLogger - nodeName string - clock clocks.WithTickerAndDelayedExecution - podResources corev1api.ResourceRequirements - exposer exposer.PodVolumeExposer - dataPathMgr *datapath.Manager - vgdpCounter *exposer.VgdpCounter - preparingTimeout time.Duration - resourceTimeout time.Duration - cancelledPVR map[string]time.Time + client client.Client + mgr manager.Manager + kubeClient kubernetes.Interface + logger logrus.FieldLogger + nodeName string + clock clocks.WithTickerAndDelayedExecution + podResources corev1api.ResourceRequirements + exposer exposer.PodVolumeExposer + dataPathMgr *datapath.Manager + vgdpCounter *exposer.VgdpCounter + preparingTimeout time.Duration + resourceTimeout time.Duration + cancelledPVR map[string]time.Time + dataMovePriorityClass string } // +kubebuilder:rbac:groups=velero.io,resources=podvolumerestores,verbs=get;list;watch;create;update;patch;delete @@ -892,6 +894,8 @@ func (r *PodVolumeRestoreReconciler) setupExposeParam(pvr *velerov1api.PodVolume HostingPodTolerations: hostingPodTolerations, OperationTimeout: r.resourceTimeout, Resources: r.podResources, + // Priority class name for the data mover pod, retrieved from node-agent-configmap + PriorityClassName: r.dataMovePriorityClass, } } diff --git a/pkg/controller/pod_volume_restore_controller_test.go b/pkg/controller/pod_volume_restore_controller_test.go index ee29d2b6c..409672c32 100644 --- a/pkg/controller/pod_volume_restore_controller_test.go +++ b/pkg/controller/pod_volume_restore_controller_test.go @@ -617,7 +617,7 @@ func initPodVolumeRestoreReconcilerWithError(objects []runtime.Object, cliObj [] dataPathMgr := datapath.NewManager(1) - return NewPodVolumeRestoreReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, nil, "test-node", time.Minute*5, time.Minute, corev1api.ResourceRequirements{}, velerotest.NewLogger()), nil + return NewPodVolumeRestoreReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, nil, "test-node", time.Minute*5, time.Minute, corev1api.ResourceRequirements{}, velerotest.NewLogger(), ""), nil } func TestPodVolumeRestoreReconcile(t *testing.T) { diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 0e514fe6c..e404d5dcf 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -82,6 +82,9 @@ type CSISnapshotExposeParam struct { // NodeOS specifies the OS of node that the source volume is attaching NodeOS string + + // PriorityClassName is the priority class name for the data mover pod + PriorityClassName string } // CSISnapshotExposeWaitParam define the input param for WaitExposed of CSI snapshots @@ -226,6 +229,7 @@ func (e *csiSnapshotExposer) Expose(ctx context.Context, ownerObject corev1api.O backupPVCReadOnly, spcNoRelabeling, csiExposeParam.NodeOS, + csiExposeParam.PriorityClassName, ) if err != nil { return errors.Wrap(err, "error to create backup pod") @@ -552,6 +556,7 @@ func (e *csiSnapshotExposer) createBackupPod( backupPVCReadOnly bool, spcNoRelabeling bool, nodeOS string, + priorityClassName string, ) (*corev1api.Pod, error) { podName := ownerObject.Name @@ -563,6 +568,11 @@ func (e *csiSnapshotExposer) createBackupPod( return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") } + // Log the priority class if it's set + if priorityClassName != "" { + e.log.Debugf("Setting priority class %q for data mover pod %s", priorityClassName, podName) + } + var gracePeriod int64 volumeMounts, volumeDevices, volumePath := kube.MakePodPVCAttachment(volumeName, backupPVC.Spec.VolumeMode, backupPVCReadOnly) volumeMounts = append(volumeMounts, podInfo.volumeMounts...) @@ -616,12 +626,20 @@ func (e *csiSnapshotExposer) createBackupPod( nodeSelector[kube.NodeOSLabel] = kube.NodeOSWindows podOS.Name = kube.NodeOSWindows - toleration = append(toleration, corev1api.Toleration{ - Key: "os", - Operator: "Equal", - Effect: "NoSchedule", - Value: "windows", - }) + toleration = append(toleration, []corev1api.Toleration{ + { + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + }, + { + Key: "os", + Operator: "Equal", + Effect: "NoExecute", + Value: "windows", + }, + }...) } else { userID := int64(0) securityCtx = &corev1api.PodSecurityContext{ @@ -693,6 +711,7 @@ func (e *csiSnapshotExposer) createBackupPod( Resources: resources, }, }, + PriorityClassName: priorityClassName, ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: volumes, diff --git a/pkg/exposer/csi_snapshot_priority_test.go b/pkg/exposer/csi_snapshot_priority_test.go new file mode 100644 index 000000000..236d15acb --- /dev/null +++ b/pkg/exposer/csi_snapshot_priority_test.go @@ -0,0 +1,246 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exposer + +import ( + "testing" + "time" + + snapshotFake "github.com/kubernetes-csi/external-snapshotter/client/v8/clientset/versioned/fake" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1api "k8s.io/api/apps/v1" + corev1api "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util/kube" +) + +func TestCreateBackupPodWithPriorityClass(t *testing.T) { + testCases := []struct { + name string + nodeAgentConfigMapData string + expectedPriorityClass string + description string + }{ + { + name: "with priority class in config map", + nodeAgentConfigMapData: `{ + "priorityClassName": "high-priority" + }`, + expectedPriorityClass: "high-priority", + description: "Should set priority class from node-agent-configmap", + }, + { + name: "without priority class in config map", + nodeAgentConfigMapData: `{ + "loadAffinity": [] + }`, + expectedPriorityClass: "", + description: "Should have empty priority class when not specified", + }, + { + name: "empty config map", + nodeAgentConfigMapData: `{}`, + expectedPriorityClass: "", + description: "Should handle empty config map gracefully", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create node-agent config map + configMap := &corev1api.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent-config", + Namespace: velerov1api.DefaultNamespace, + }, + Data: map[string]string{ + "config": tc.nodeAgentConfigMapData, + }, + } + _, err = kubeClient.CoreV1().ConfigMaps(velerov1api.DefaultNamespace).Create(ctx, configMap, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the backup pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataUpload", + Name: "test-dataupload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a backup PVC + backupPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-backup-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create fake snapshot client + fakeSnapshotClient := snapshotFake.NewSimpleClientset() + + // Create CSI snapshot exposer + exposer := &csiSnapshotExposer{ + kubeClient: kubeClient, + csiSnapshotClient: fakeSnapshotClient.SnapshotV1(), + log: velerotest.NewLogger(), + } + + // Call createBackupPod + pod, err := exposer.createBackupPod( + ctx, + ownerObject, + backupPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + nil, // affinity + corev1api.ResourceRequirements{}, + false, // backupPVCReadOnly + false, // spcNoRelabeling + kube.NodeOSLinux, + tc.expectedPriorityClass, + ) + + require.NoError(t, err, tc.description) + assert.NotNil(t, pod) + assert.Equal(t, tc.expectedPriorityClass, pod.Spec.PriorityClassName, tc.description) + }) + } +} + +func TestCreateBackupPodWithMissingConfigMap(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client without config map + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the backup pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataUpload", + Name: "test-dataupload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a backup PVC + backupPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-backup-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create fake snapshot client + fakeSnapshotClient := snapshotFake.NewSimpleClientset() + + // Create CSI snapshot exposer + exposer := &csiSnapshotExposer{ + kubeClient: kubeClient, + csiSnapshotClient: fakeSnapshotClient.SnapshotV1(), + log: velerotest.NewLogger(), + } + + // Call createBackupPod + pod, err := exposer.createBackupPod( + ctx, + ownerObject, + backupPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + nil, // affinity + corev1api.ResourceRequirements{}, + false, // backupPVCReadOnly + false, // spcNoRelabeling + kube.NodeOSLinux, + "", // empty priority class since config map is missing + ) + + // Should succeed even when config map is missing + require.NoError(t, err, "Should succeed even when config map is missing") + assert.NotNil(t, pod) + assert.Empty(t, pod.Spec.PriorityClassName, "Should have empty priority class when config map is missing") +} diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index f794ea2ed..c8ac7fcce 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -69,6 +69,9 @@ type GenericRestoreExposeParam struct { // LoadAffinity specifies the node affinity of the backup pod LoadAffinity []*kube.LoadAffinity + + // PriorityClassName is the priority class name for the data mover pod + PriorityClassName string } // GenericRestoreExposer is the interfaces for a generic restore exposer @@ -156,6 +159,7 @@ func (e *genericRestoreExposer) Expose(ctx context.Context, ownerObject corev1ap param.Resources, param.NodeOS, affinity, + param.PriorityClassName, ) if err != nil { return errors.Wrapf(err, "error to create restore pod") @@ -422,6 +426,7 @@ func (e *genericRestoreExposer) createRestorePod( resources corev1api.ResourceRequirements, nodeOS string, affinity *kube.LoadAffinity, + priorityClassName string, ) (*corev1api.Pod, error) { restorePodName := ownerObject.Name restorePVCName := ownerObject.Name @@ -443,6 +448,11 @@ func (e *genericRestoreExposer) createRestorePod( return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") } + // Log the priority class if it's set + if priorityClassName != "" { + e.log.Debugf("Setting priority class %q for data mover pod %s", priorityClassName, restorePodName) + } + var gracePeriod int64 volumeMounts, volumeDevices, volumePath := kube.MakePodPVCAttachment(volumeName, targetPVC.Spec.VolumeMode, false) volumeMounts = append(volumeMounts, podInfo.volumeMounts...) @@ -491,12 +501,20 @@ func (e *genericRestoreExposer) createRestorePod( nodeSelector[kube.NodeOSLabel] = kube.NodeOSWindows podOS.Name = kube.NodeOSWindows - toleration = append(toleration, corev1api.Toleration{ - Key: "os", - Operator: "Equal", - Effect: "NoSchedule", - Value: "windows", - }) + toleration = append(toleration, []corev1api.Toleration{ + { + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + }, + { + Key: "os", + Operator: "Equal", + Effect: "NoExecute", + Value: "windows", + }, + }...) } else { userID := int64(0) securityCtx = &corev1api.PodSecurityContext{ @@ -556,6 +574,7 @@ func (e *genericRestoreExposer) createRestorePod( Resources: resources, }, }, + PriorityClassName: priorityClassName, ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: volumes, diff --git a/pkg/exposer/generic_restore_priority_test.go b/pkg/exposer/generic_restore_priority_test.go new file mode 100644 index 000000000..b67652b93 --- /dev/null +++ b/pkg/exposer/generic_restore_priority_test.go @@ -0,0 +1,236 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exposer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1api "k8s.io/api/apps/v1" + corev1api "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util/kube" +) + +// TestCreateRestorePodWithPriorityClass verifies that the priority class name is properly set in the restore pod +func TestCreateRestorePodWithPriorityClass(t *testing.T) { + testCases := []struct { + name string + nodeAgentConfigMapData string + expectedPriorityClass string + description string + }{ + { + name: "with priority class in config map", + nodeAgentConfigMapData: `{ + "priorityClassName": "low-priority" + }`, + expectedPriorityClass: "low-priority", + description: "Should set priority class from node-agent-configmap", + }, + { + name: "without priority class in config map", + nodeAgentConfigMapData: `{ + "loadAffinity": [] + }`, + expectedPriorityClass: "", + description: "Should have empty priority class when not specified", + }, + { + name: "empty config map", + nodeAgentConfigMapData: `{}`, + expectedPriorityClass: "", + description: "Should handle empty config map gracefully", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create node-agent config map + configMap := &corev1api.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent-config", + Namespace: velerov1api.DefaultNamespace, + }, + Data: map[string]string{ + "config": tc.nodeAgentConfigMapData, + }, + } + _, err = kubeClient.CoreV1().ConfigMaps(velerov1api.DefaultNamespace).Create(ctx, configMap, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the restore pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataDownload", + Name: "test-datadownload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a target PVC + targetPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-target-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create generic restore exposer + exposer := &genericRestoreExposer{ + kubeClient: kubeClient, + log: velerotest.NewLogger(), + } + + // Call createRestorePod + pod, err := exposer.createRestorePod( + ctx, + ownerObject, + targetPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + "", // selectedNode + corev1api.ResourceRequirements{}, + kube.NodeOSLinux, + nil, // affinity + tc.expectedPriorityClass, + ) + + require.NoError(t, err, tc.description) + assert.NotNil(t, pod) + assert.Equal(t, tc.expectedPriorityClass, pod.Spec.PriorityClassName, tc.description) + }) + } +} + +func TestCreateRestorePodWithMissingConfigMap(t *testing.T) { + ctx := t.Context() + + // Create fake Kubernetes client without config map + kubeClient := fake.NewSimpleClientset() + + // Create node-agent daemonset (required for getInheritedPodInfo) + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-agent", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "node-agent", + Image: "velero/velero:latest", + }, + }, + }, + }, + }, + } + _, err := kubeClient.AppsV1().DaemonSets(velerov1api.DefaultNamespace).Create(ctx, daemonSet, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create owner object for the restore pod + ownerObject := corev1api.ObjectReference{ + APIVersion: velerov1api.SchemeGroupVersion.String(), + Kind: "DataDownload", + Name: "test-datadownload", + Namespace: velerov1api.DefaultNamespace, + UID: "test-uid", + } + + // Create a target PVC + targetPVC := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-target-pvc", + Namespace: velerov1api.DefaultNamespace, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + AccessModes: []corev1api.PersistentVolumeAccessMode{ + corev1api.ReadWriteOnce, + }, + }, + } + + // Create generic restore exposer + exposer := &genericRestoreExposer{ + kubeClient: kubeClient, + log: velerotest.NewLogger(), + } + + // Call createRestorePod + pod, err := exposer.createRestorePod( + ctx, + ownerObject, + targetPVC, + time.Minute*5, + nil, // labels + nil, // annotations + nil, // tolerations + "", // selectedNode + corev1api.ResourceRequirements{}, + kube.NodeOSLinux, + nil, // affinity + "", // empty priority class since config map is missing + ) + + // Should succeed even when config map is missing + require.NoError(t, err, "Should succeed even when config map is missing") + assert.NotNil(t, pod) + assert.Empty(t, pod.Spec.PriorityClassName, "Should have empty priority class when config map is missing") +} diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index 657d75347..b5679889b 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -914,6 +914,7 @@ func TestCreateRestorePod(t *testing.T) { corev1api.ResourceRequirements{}, test.nodeOS, test.affinity, + "", // priority class name ) require.NoError(t, err) diff --git a/pkg/exposer/pod_volume.go b/pkg/exposer/pod_volume.go index bdde55175..ea1fb2d1f 100644 --- a/pkg/exposer/pod_volume.go +++ b/pkg/exposer/pod_volume.go @@ -70,6 +70,9 @@ type PodVolumeExposeParam struct { // Type specifies the type of the expose, either backup or erstore Type string + + // PriorityClassName is the priority class name for the data mover pod + PriorityClassName string } // PodVolumeExposer is the interfaces for a pod volume exposer @@ -150,7 +153,7 @@ func (e *podVolumeExposer) Expose(ctx context.Context, ownerObject corev1api.Obj curLog.WithField("path", path).Infof("Host path is retrieved for pod %s, volume %s", param.ClientPodName, param.ClientPodVolume) - hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, param.HostingPodTolerations, pod.Spec.NodeName, param.Resources, nodeOS) + hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, param.HostingPodTolerations, pod.Spec.NodeName, param.Resources, nodeOS, param.PriorityClassName) if err != nil { return errors.Wrapf(err, "error to create hosting pod") } @@ -266,7 +269,7 @@ func (e *podVolumeExposer) CleanUp(ctx context.Context, ownerObject corev1api.Ob } func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject corev1api.ObjectReference, exposeType string, hostPath string, - operationTimeout time.Duration, label map[string]string, annotation map[string]string, toleration []corev1api.Toleration, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string) (*corev1api.Pod, error) { + operationTimeout time.Duration, label map[string]string, annotation map[string]string, toleration []corev1api.Toleration, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string, priorityClassName string) (*corev1api.Pod, error) { hostingPodName := ownerObject.Name containerName := string(ownerObject.UID) @@ -278,6 +281,11 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") } + // Log the priority class if it's set + if priorityClassName != "" { + e.log.Debugf("Setting priority class %q for data mover pod %s", priorityClassName, hostingPodName) + } + var gracePeriod int64 mountPropagation := corev1api.MountPropagationHostToContainer volumeMounts := []corev1api.VolumeMount{{ @@ -332,12 +340,20 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor nodeSelector[kube.NodeOSLabel] = kube.NodeOSWindows podOS.Name = kube.NodeOSWindows - toleration = append(toleration, corev1api.Toleration{ - Key: "os", - Operator: "Equal", - Effect: "NoSchedule", - Value: "windows", - }) + toleration = append(toleration, []corev1api.Toleration{ + { + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + }, + { + Key: "os", + Operator: "Equal", + Effect: "NoExecute", + Value: "windows", + }, + }...) } else { userID := int64(0) securityCtx = &corev1api.PodSecurityContext{ @@ -380,6 +396,7 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor Resources: resources, }, }, + PriorityClassName: priorityClassName, ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: volumes, diff --git a/pkg/install/daemonset.go b/pkg/install/daemonset.go index d875e9350..98f30cc69 100644 --- a/pkg/install/daemonset.go +++ b/pkg/install/daemonset.go @@ -179,6 +179,7 @@ func DaemonSet(namespace string, opts ...podTemplateOption) *appsv1api.DaemonSet Resources: c.resources, }, }, + PriorityClassName: c.priorityClassName, }, }, }, @@ -243,6 +244,12 @@ func DaemonSet(namespace string, opts ...podTemplateOption) *appsv1api.DaemonSet Effect: "NoSchedule", Value: "windows", }, + { + Key: "os", + Operator: "Equal", + Effect: "NoExecute", + Value: "windows", + }, } } else { daemonSet.Spec.Template.Spec.NodeSelector = map[string]string{ diff --git a/pkg/install/daemonset_test.go b/pkg/install/daemonset_test.go index 8ad00fb3b..77a1ebae8 100644 --- a/pkg/install/daemonset_test.go +++ b/pkg/install/daemonset_test.go @@ -81,3 +81,37 @@ func TestDaemonSet(t *testing.T) { assert.Equal(t, (*corev1api.PodSecurityContext)(nil), ds.Spec.Template.Spec.SecurityContext) assert.Equal(t, (*corev1api.SecurityContext)(nil), ds.Spec.Template.Spec.Containers[0].SecurityContext) } + +func TestDaemonSetWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + priorityClassName string + expectedValue string + }{ + { + name: "with priority class name", + priorityClassName: "high-priority", + expectedValue: "high-priority", + }, + { + name: "without priority class name", + priorityClassName: "", + expectedValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a daemonset with the priority class name option + var opts []podTemplateOption + if tc.priorityClassName != "" { + opts = append(opts, WithPriorityClassName(tc.priorityClassName)) + } + + daemonset := DaemonSet("velero", opts...) + + // Verify the priority class name is set correctly + assert.Equal(t, tc.expectedValue, daemonset.Spec.Template.Spec.PriorityClassName) + }) + } +} diff --git a/pkg/install/deployment.go b/pkg/install/deployment.go index 538958789..91acce344 100644 --- a/pkg/install/deployment.go +++ b/pkg/install/deployment.go @@ -62,6 +62,7 @@ type podTemplateConfig struct { forWindows bool kubeletRootDir string nodeAgentDisableHostPath bool + priorityClassName string } func WithImage(image string) podTemplateOption { @@ -223,6 +224,12 @@ func WithItemBlockWorkerCount(itemBlockWorkerCount int) podTemplateOption { } } +func WithPriorityClassName(priorityClassName string) podTemplateOption { + return func(c *podTemplateConfig) { + c.priorityClassName = priorityClassName + } +} + func WithForWindows() podTemplateOption { return func(c *podTemplateConfig) { c.forWindows = true @@ -407,6 +414,7 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1api.Deployme }, }, }, + PriorityClassName: c.priorityClassName, }, }, }, diff --git a/pkg/install/deployment_test.go b/pkg/install/deployment_test.go index ac53e6033..b8aeaa9dd 100644 --- a/pkg/install/deployment_test.go +++ b/pkg/install/deployment_test.go @@ -103,3 +103,37 @@ func TestDeployment(t *testing.T) { assert.Equal(t, "linux", deploy.Spec.Template.Spec.NodeSelector["kubernetes.io/os"]) assert.Equal(t, "linux", string(deploy.Spec.Template.Spec.OS.Name)) } + +func TestDeploymentWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + priorityClassName string + expectedValue string + }{ + { + name: "with priority class name", + priorityClassName: "high-priority", + expectedValue: "high-priority", + }, + { + name: "without priority class name", + priorityClassName: "", + expectedValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a deployment with the priority class name option + var opts []podTemplateOption + if tc.priorityClassName != "" { + opts = append(opts, WithPriorityClassName(tc.priorityClassName)) + } + + deployment := Deployment("velero", opts...) + + // Verify the priority class name is set correctly + assert.Equal(t, tc.expectedValue, deployment.Spec.Template.Spec.PriorityClassName) + }) + } +} diff --git a/pkg/install/resources.go b/pkg/install/resources.go index 1888a4852..344fc599f 100644 --- a/pkg/install/resources.go +++ b/pkg/install/resources.go @@ -273,6 +273,8 @@ type VeleroOptions struct { ItemBlockWorkerCount int KubeletRootDir string NodeAgentDisableHostPath bool + ServerPriorityClassName string + NodeAgentPriorityClassName string } func AllCRDs() *unstructured.UnstructuredList { @@ -362,6 +364,10 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList { WithItemBlockWorkerCount(o.ItemBlockWorkerCount), } + if o.ServerPriorityClassName != "" { + deployOpts = append(deployOpts, WithPriorityClassName(o.ServerPriorityClassName)) + } + if len(o.Features) > 0 { deployOpts = append(deployOpts, WithFeatures(o.Features)) } @@ -424,6 +430,10 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList { dsOpts = append(dsOpts, WithKubeletRootDir(o.KubeletRootDir)) } + if o.NodeAgentPriorityClassName != "" { + dsOpts = append(dsOpts, WithPriorityClassName(o.NodeAgentPriorityClassName)) + } + if o.UseNodeAgent { ds := DaemonSet(o.Namespace, dsOpts...) if err := appendUnstructured(resources, ds); err != nil { diff --git a/pkg/install/resources_test.go b/pkg/install/resources_test.go index 6118c04bb..bafa3a684 100644 --- a/pkg/install/resources_test.go +++ b/pkg/install/resources_test.go @@ -117,3 +117,132 @@ func TestAllResources(t *testing.T) { assert.Len(t, ds, 2) } + +func TestAllResourcesWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + serverPriorityClassName string + nodeAgentPriorityClassName string + useNodeAgent bool + }{ + { + name: "with same priority class for server and node agent", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "high-priority", + useNodeAgent: true, + }, + { + name: "with different priority classes for server and node agent", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "medium-priority", + useNodeAgent: true, + }, + { + name: "with only server priority class", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "", + useNodeAgent: true, + }, + { + name: "with only node agent priority class", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "medium-priority", + useNodeAgent: true, + }, + { + name: "with priority class name without node agent", + serverPriorityClassName: "high-priority", + nodeAgentPriorityClassName: "medium-priority", + useNodeAgent: false, + }, + { + name: "without priority class name with node agent", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "", + useNodeAgent: true, + }, + { + name: "without priority class name without node agent", + serverPriorityClassName: "", + nodeAgentPriorityClassName: "", + useNodeAgent: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create VeleroOptions with the priority class names + options := &VeleroOptions{ + Namespace: "velero", + UseNodeAgent: tc.useNodeAgent, + ServerPriorityClassName: tc.serverPriorityClassName, + NodeAgentPriorityClassName: tc.nodeAgentPriorityClassName, + } + + // Generate all resources + resources := AllResources(options) + + // Find the deployment and verify priority class name + deploymentFound := false + daemonsetFound := false + + for i := range resources.Items { + item := resources.Items[i] + + // Check deployment + if item.GetKind() == "Deployment" && item.GetName() == "velero" { + deploymentFound = true + + // Extract priority class name from the unstructured object + priorityClassName, found, err := unstructured.NestedString( + item.Object, + "spec", "template", "spec", "priorityClassName", + ) + + require.NoError(t, err) + if tc.serverPriorityClassName != "" { + assert.True(t, found, "Server priorityClassName should be set") + assert.Equal(t, tc.serverPriorityClassName, priorityClassName, + "Server deployment should have the correct priority class") + } else { + // If no priority class name was provided, it might not be set at all + if found { + assert.Empty(t, priorityClassName) + } + } + } + + // Check daemonset if node agent is enabled + if tc.useNodeAgent && item.GetKind() == "DaemonSet" && item.GetName() == "node-agent" { + daemonsetFound = true + + // Extract priority class name from the unstructured object + priorityClassName, found, err := unstructured.NestedString( + item.Object, + "spec", "template", "spec", "priorityClassName", + ) + + require.NoError(t, err) + if tc.nodeAgentPriorityClassName != "" { + assert.True(t, found, "Node agent priorityClassName should be set") + assert.Equal(t, tc.nodeAgentPriorityClassName, priorityClassName, + "Node agent daemonset should have the correct priority class") + } else { + // If no priority class name was provided, it might not be set at all + if found { + assert.Empty(t, priorityClassName) + } + } + } + } + + // Verify we found the deployment + assert.True(t, deploymentFound, "Deployment should be present in resources") + + // Verify we found the daemonset if node agent is enabled + if tc.useNodeAgent { + assert.True(t, daemonsetFound, "DaemonSet should be present when UseNodeAgent is true") + } + }) + } +} diff --git a/pkg/nodeagent/node_agent.go b/pkg/nodeagent/node_agent.go index 7ca9f602e..d83a9eba3 100644 --- a/pkg/nodeagent/node_agent.go +++ b/pkg/nodeagent/node_agent.go @@ -112,6 +112,9 @@ type Configs struct { // PodResources is the resource config for various types of pods launched by node-agent, i.e., data mover pods. PodResources *kube.PodResources `json:"podResources,omitempty"` + + // PriorityClassName is the priority class name for data mover pods created by the node agent + PriorityClassName string `json:"priorityClassName,omitempty"` } func IsRunningOnLinux(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error { diff --git a/pkg/nodeagent/node_agent_test.go b/pkg/nodeagent/node_agent_test.go index 292c14a61..aaf851d6e 100644 --- a/pkg/nodeagent/node_agent_test.go +++ b/pkg/nodeagent/node_agent_test.go @@ -246,6 +246,8 @@ func TestGetConfigs(t *testing.T) { cmWithInvalidDataFormat := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "wrong").Result() cmWithoutCocurrentData := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"someothers\":{\"someother\": 10}}").Result() cmWithValidData := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"loadConcurrency\":{\"globalConfig\": 5}}").Result() + cmWithPriorityClass := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"priorityClassName\": \"high-priority\"}").Result() + cmWithPriorityClassAndOther := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"priorityClassName\": \"low-priority\", \"loadConcurrency\":{\"globalConfig\": 3}}").Result() tests := []struct { name string @@ -305,6 +307,29 @@ func TestGetConfigs(t *testing.T) { }, }, }, + { + name: "configmap with priority class name", + namespace: "fake-ns", + kubeClientObj: []runtime.Object{ + cmWithPriorityClass, + }, + expectResult: &Configs{ + PriorityClassName: "high-priority", + }, + }, + { + name: "configmap with priority class and other configs", + namespace: "fake-ns", + kubeClientObj: []runtime.Object{ + cmWithPriorityClassAndOther, + }, + expectResult: &Configs{ + PriorityClassName: "low-priority", + LoadConcurrency: &LoadConcurrency{ + GlobalConfig: 3, + }, + }, + }, } for _, test := range tests { @@ -321,10 +346,16 @@ func TestGetConfigs(t *testing.T) { if test.expectResult == nil { assert.Nil(t, result) - } else if test.expectResult.LoadConcurrency == nil { - assert.Nil(t, result.LoadConcurrency) } else { - assert.Equal(t, *test.expectResult.LoadConcurrency, *result.LoadConcurrency) + // Check PriorityClassName + assert.Equal(t, test.expectResult.PriorityClassName, result.PriorityClassName) + + // Check LoadConcurrency + if test.expectResult.LoadConcurrency == nil { + assert.Nil(t, result.LoadConcurrency) + } else { + assert.Equal(t, *test.expectResult.LoadConcurrency, *result.LoadConcurrency) + } } } else { assert.EqualError(t, err, test.expectErr) diff --git a/pkg/repository/maintenance/maintenance.go b/pkg/repository/maintenance/maintenance.go index c887e8813..9bc61c058 100644 --- a/pkg/repository/maintenance/maintenance.go +++ b/pkg/repository/maintenance/maintenance.go @@ -50,6 +50,12 @@ const ( RepositoryNameLabel = "velero.io/repo-name" GlobalKeyForRepoMaintenanceJobCM = "global" TerminationLogIndicator = "Repo maintenance error: " + + DefaultKeepLatestMaintenanceJobs = 3 + DefaultMaintenanceJobCPURequest = "0" + DefaultMaintenanceJobCPULimit = "0" + DefaultMaintenanceJobMemRequest = "0" + DefaultMaintenanceJobMemLimit = "0" ) type JobConfigs struct { @@ -61,6 +67,10 @@ type JobConfigs struct { // KeepLatestMaintenanceJobs is the number of latest maintenance jobs to keep for the repository. KeepLatestMaintenanceJobs *int `json:"keepLatestMaintenanceJobs,omitempty"` + + // PriorityClassName is the priority class name for the maintenance job pod + // Note: This is only read from the global configuration, not per-repository + PriorityClassName string `json:"priorityClassName,omitempty"` } func GenerateJobName(repo string) string { @@ -75,7 +85,8 @@ func GenerateJobName(repo string) string { } // DeleteOldJobs deletes old maintenance jobs and keeps the latest N jobs -func DeleteOldJobs(cli client.Client, repo string, keep int) error { +func DeleteOldJobs(cli client.Client, repo string, keep int, logger logrus.FieldLogger) error { + logger.Infof("Start to delete old maintenance jobs. %d jobs will be kept.", keep) // Get the maintenance job list by label jobList := &batchv1api.JobList{} err := cli.List(context.TODO(), jobList, client.MatchingLabels(map[string]string{RepositoryNameLabel: repo})) @@ -278,13 +289,21 @@ func getJobConfig( if result.KeepLatestMaintenanceJobs == nil && globalResult.KeepLatestMaintenanceJobs != nil { result.KeepLatestMaintenanceJobs = globalResult.KeepLatestMaintenanceJobs } + + // Priority class is only read from global config, not per-repository + if globalResult.PriorityClassName != "" { + result.PriorityClassName = globalResult.PriorityClassName + } } + logger.Debugf("Didn't find content for repository %s in cm %s", repo.Name, repoMaintenanceJobConfig) + return result, nil } // GetKeepLatestMaintenanceJobs returns the configured number of maintenance jobs to keep from the JobConfigs. -// If not configured in the ConfigMap, it returns 0 to indicate using the fallback value. +// Because the CLI configured Job kept number is deprecated, +// if not configured in the ConfigMap, it returns default value to indicate using the fallback value. func GetKeepLatestMaintenanceJobs( ctx context.Context, client client.Client, @@ -294,19 +313,19 @@ func GetKeepLatestMaintenanceJobs( repo *velerov1api.BackupRepository, ) (int, error) { if repoMaintenanceJobConfig == "" { - return 0, nil + return DefaultKeepLatestMaintenanceJobs, nil } config, err := getJobConfig(ctx, client, logger, veleroNamespace, repoMaintenanceJobConfig, repo) if err != nil { - return 0, err + return DefaultKeepLatestMaintenanceJobs, err } if config != nil && config.KeepLatestMaintenanceJobs != nil { return *config.KeepLatestMaintenanceJobs, nil } - return 0, nil + return DefaultKeepLatestMaintenanceJobs, nil } // WaitJobComplete waits the completion of the specified maintenance job and return the BackupRepositoryMaintenanceStatus @@ -393,8 +412,15 @@ func WaitAllJobsComplete(ctx context.Context, cli client.Client, repo *velerov1a } // StartNewJob creates a new maintenance job -func StartNewJob(cli client.Client, ctx context.Context, repo *velerov1api.BackupRepository, repoMaintenanceJobConfig string, - podResources kube.PodResources, logLevel logrus.Level, logFormat *logging.FormatFlag, logger logrus.FieldLogger) (string, error) { +func StartNewJob( + cli client.Client, + ctx context.Context, + repo *velerov1api.BackupRepository, + repoMaintenanceJobConfig string, + logLevel logrus.Level, + logFormat *logging.FormatFlag, + logger logrus.FieldLogger, +) (string, error) { bsl := &velerov1api.BackupStorageLocation{} if err := cli.Get(ctx, client.ObjectKey{Namespace: repo.Namespace, Name: repo.Spec.BackupStorageLocation}, bsl); err != nil { return "", errors.WithStack(err) @@ -424,14 +450,14 @@ func StartNewJob(cli client.Client, ctx context.Context, repo *velerov1api.Backu log.Info("Starting maintenance repo") - maintenanceJob, err := buildJob(cli, ctx, repo, bsl.Name, jobConfig, podResources, logLevel, logFormat) + maintenanceJob, err := buildJob(cli, ctx, repo, bsl.Name, jobConfig, logLevel, logFormat, log) if err != nil { return "", errors.Wrap(err, "error to build maintenance job") } log = log.WithField("job", fmt.Sprintf("%s/%s", maintenanceJob.Namespace, maintenanceJob.Name)) - if err := cli.Create(context.TODO(), maintenanceJob); err != nil { + if err := cli.Create(ctx, maintenanceJob); err != nil { return "", errors.Wrap(err, "error to create maintenance job") } @@ -440,15 +466,35 @@ func StartNewJob(cli client.Client, ctx context.Context, repo *velerov1api.Backu return maintenanceJob.Name, nil } +func getPriorityClassName(ctx context.Context, cli client.Client, config *JobConfigs, logger logrus.FieldLogger) string { + // Use the priority class name from the global job configuration if available + // Note: Priority class is only read from global config, not per-repository + if config != nil && config.PriorityClassName != "" { + // Validate that the priority class exists in the cluster + if err := kube.ValidatePriorityClassWithClient(ctx, cli, config.PriorityClassName); err != nil { + if apierrors.IsNotFound(err) { + logger.Warnf("Priority class %q not found in cluster. Job creation may fail if the priority class doesn't exist when jobs are scheduled.", config.PriorityClassName) + } else { + logger.WithError(err).Warnf("Failed to validate priority class %q", config.PriorityClassName) + } + // Still return the priority class name to let Kubernetes handle the error + return config.PriorityClassName + } + logger.Infof("Validated priority class %q exists in cluster", config.PriorityClassName) + return config.PriorityClassName + } + return "" +} + func buildJob( cli client.Client, ctx context.Context, repo *velerov1api.BackupRepository, bslName string, config *JobConfigs, - podResources kube.PodResources, logLevel logrus.Level, logFormat *logging.FormatFlag, + logger logrus.FieldLogger, ) (*batchv1api.Job, error) { // Get the Velero server deployment deployment := &appsv1api.Deployment{} @@ -484,10 +530,10 @@ func buildJob( image := veleroutil.GetVeleroServerImage(deployment) // Set resource limits and requests - cpuRequest := podResources.CPURequest - memRequest := podResources.MemoryRequest - cpuLimit := podResources.CPULimit - memLimit := podResources.MemoryLimit + cpuRequest := DefaultMaintenanceJobCPURequest + memRequest := DefaultMaintenanceJobMemRequest + cpuLimit := DefaultMaintenanceJobCPULimit + memLimit := DefaultMaintenanceJobMemLimit if config != nil && config.PodResources != nil { cpuRequest = config.PodResources.CPURequest memRequest = config.PodResources.MemoryRequest @@ -559,6 +605,7 @@ func buildJob( TerminationMessagePolicy: corev1api.TerminationMessageFallbackToLogsOnError, }, }, + PriorityClassName: getPriorityClassName(ctx, cli, config, logger), RestartPolicy: corev1api.RestartPolicyNever, SecurityContext: podSecurityContext, Volumes: volumes, diff --git a/pkg/repository/maintenance/maintenance_test.go b/pkg/repository/maintenance/maintenance_test.go index d1ffb0f7c..f0a8568be 100644 --- a/pkg/repository/maintenance/maintenance_test.go +++ b/pkg/repository/maintenance/maintenance_test.go @@ -29,6 +29,7 @@ import ( "github.com/stretchr/testify/require" batchv1api "k8s.io/api/batch/v1" corev1api "k8s.io/api/core/v1" + schedulingv1 "k8s.io/api/scheduling/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -117,7 +118,7 @@ func TestDeleteOldJobs(t *testing.T) { cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() // Call the function - err := DeleteOldJobs(cli, repo, keep) + err := DeleteOldJobs(cli, repo, keep, velerotest.NewLogger()) require.NoError(t, err) // Get the remaining jobs @@ -387,6 +388,7 @@ func TestGetResultFromJob(t *testing.T) { } func TestGetJobConfig(t *testing.T) { + keepLatestMaintenanceJobs := 1 ctx := t.Context() logger := logrus.New() veleroNamespace := "velero" @@ -504,11 +506,12 @@ func TestGetJobConfig(t *testing.T) { Name: repoMaintenanceJobConfig, }, Data: map[string]string{ - GlobalKeyForRepoMaintenanceJobCM: "{\"podResources\":{\"cpuRequest\":\"50m\",\"cpuLimit\":\"100m\",\"memoryRequest\":\"50Mi\",\"memoryLimit\":\"100Mi\"},\"loadAffinity\":[{\"nodeSelector\":{\"matchExpressions\":[{\"key\":\"cloud.google.com/machine-family\",\"operator\":\"In\",\"values\":[\"n2\"]}]}}]}", + GlobalKeyForRepoMaintenanceJobCM: "{\"keepLatestMaintenanceJobs\":1,\"podResources\":{\"cpuRequest\":\"50m\",\"cpuLimit\":\"100m\",\"memoryRequest\":\"50Mi\",\"memoryLimit\":\"100Mi\"},\"loadAffinity\":[{\"nodeSelector\":{\"matchExpressions\":[{\"key\":\"cloud.google.com/machine-family\",\"operator\":\"In\",\"values\":[\"n2\"]}]}}]}", "test-default-kopia": "{\"podResources\":{\"cpuRequest\":\"100m\",\"cpuLimit\":\"200m\",\"memoryRequest\":\"100Mi\",\"memoryLimit\":\"200Mi\"},\"loadAffinity\":[{\"nodeSelector\":{\"matchExpressions\":[{\"key\":\"cloud.google.com/machine-family\",\"operator\":\"In\",\"values\":[\"e2\"]}]}}]}", }, }, expectedConfig: &JobConfigs{ + KeepLatestMaintenanceJobs: &keepLatestMaintenanceJobs, PodResources: &kube.PodResources{ CPURequest: "100m", CPULimit: "200m", @@ -1098,9 +1101,9 @@ func TestBuildJob(t *testing.T) { param.BackupRepo, param.BackupLocation.Name, tc.m, - *tc.m.PodResources, tc.logLevel, tc.logFormat, + logrus.New(), ) // Check the error @@ -1177,7 +1180,7 @@ func TestGetKeepLatestMaintenanceJobs(t *testing.T) { repoMaintenanceJobConfig: "", configMap: nil, repo: mockBackupRepo(), - expectedValue: 0, + expectedValue: 3, expectError: false, }, { @@ -1185,7 +1188,7 @@ func TestGetKeepLatestMaintenanceJobs(t *testing.T) { repoMaintenanceJobConfig: "non-existent-config", configMap: nil, repo: mockBackupRepo(), - expectedValue: 0, + expectedValue: 3, expectError: false, }, { @@ -1234,7 +1237,7 @@ func TestGetKeepLatestMaintenanceJobs(t *testing.T) { }, }, repo: mockBackupRepo(), - expectedValue: 0, + expectedValue: 3, expectError: false, }, { @@ -1250,7 +1253,7 @@ func TestGetKeepLatestMaintenanceJobs(t *testing.T) { }, }, repo: mockBackupRepo(), - expectedValue: 0, + expectedValue: 3, expectError: true, }, } @@ -1300,3 +1303,181 @@ func mockBackupRepo() *velerov1api.BackupRepository { }, } } + +func TestGetPriorityClassName(t *testing.T) { + testCases := []struct { + name string + config *JobConfigs + priorityClassExists bool + expectedValue string + expectedLogContains string + expectedLogLevel string + }{ + { + name: "empty priority class name should return empty string", + config: &JobConfigs{PriorityClassName: ""}, + expectedValue: "", + expectedLogContains: "", + }, + { + name: "nil config should return empty string", + config: nil, + expectedValue: "", + expectedLogContains: "", + }, + { + name: "existing priority class should log info and return name", + config: &JobConfigs{PriorityClassName: "high-priority"}, + priorityClassExists: true, + expectedValue: "high-priority", + expectedLogContains: "Validated priority class \\\"high-priority\\\" exists in cluster", + expectedLogLevel: "info", + }, + { + name: "non-existing priority class should log warning and still return name", + config: &JobConfigs{PriorityClassName: "missing-priority"}, + priorityClassExists: false, + expectedValue: "missing-priority", + expectedLogContains: "Priority class \\\"missing-priority\\\" not found in cluster", + expectedLogLevel: "warning", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a new scheme and add necessary API types + localScheme := runtime.NewScheme() + err := schedulingv1.AddToScheme(localScheme) + require.NoError(t, err) + + // Create fake client builder + clientBuilder := fake.NewClientBuilder().WithScheme(localScheme) + + // Add priority class if it should exist + if tc.priorityClassExists { + priorityClass := &schedulingv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.config.PriorityClassName, + }, + Value: 1000, + } + clientBuilder = clientBuilder.WithObjects(priorityClass) + } + + client := clientBuilder.Build() + + // Capture logs + var logBuffer strings.Builder + logger := logrus.New() + logger.SetOutput(&logBuffer) + logger.SetLevel(logrus.InfoLevel) + + // Call the function + result := getPriorityClassName(t.Context(), client, tc.config, logger) + + // Verify the result + assert.Equal(t, tc.expectedValue, result) + + // Verify log output + logOutput := logBuffer.String() + if tc.expectedLogContains != "" { + assert.Contains(t, logOutput, tc.expectedLogContains) + } + + // Verify log level + if tc.expectedLogLevel == "warning" { + assert.Contains(t, logOutput, "level=warning") + } else if tc.expectedLogLevel == "info" { + assert.Contains(t, logOutput, "level=info") + } + }) + } +} + +func TestBuildJobWithPriorityClassName(t *testing.T) { + testCases := []struct { + name string + priorityClassName string + expectedValue string + }{ + { + name: "with priority class name", + priorityClassName: "high-priority", + expectedValue: "high-priority", + }, + { + name: "without priority class name", + priorityClassName: "", + expectedValue: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a new scheme and add necessary API types + localScheme := runtime.NewScheme() + err := velerov1api.AddToScheme(localScheme) + require.NoError(t, err) + err = appsv1api.AddToScheme(localScheme) + require.NoError(t, err) + err = batchv1api.AddToScheme(localScheme) + require.NoError(t, err) + err = schedulingv1.AddToScheme(localScheme) + require.NoError(t, err) + // Create a fake client + client := fake.NewClientBuilder().WithScheme(localScheme).Build() + + // Create a deployment with the specified priority class name + deployment := &appsv1api.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "velero", + Namespace: "velero", + }, + Spec: appsv1api.DeploymentSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "velero", + Image: "velero/velero:latest", + }, + }, + PriorityClassName: tc.priorityClassName, + }, + }, + }, + } + + // Create a backup repository + repo := &velerov1api.BackupRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-repo", + Namespace: "velero", + }, + Spec: velerov1api.BackupRepositorySpec{ + VolumeNamespace: "velero", + BackupStorageLocation: "default", + }, + } + + // Create the deployment in the fake client + err = client.Create(t.Context(), deployment) + require.NoError(t, err) + + // Create minimal job configs and resources + jobConfig := &JobConfigs{ + PriorityClassName: tc.priorityClassName, + } + logLevel := logrus.InfoLevel + logFormat := logging.NewFormatFlag() + logFormat.Set("text") + + // Call buildJob + job, err := buildJob(client, t.Context(), repo, "default", jobConfig, logLevel, logFormat, logrus.New()) + require.NoError(t, err) + + // Verify the priority class name is set correctly + assert.Equal(t, tc.expectedValue, job.Spec.Template.Spec.PriorityClassName) + }) + } +} diff --git a/pkg/repository/udmrepo/kopialib/backend/azure.go b/pkg/repository/udmrepo/kopialib/backend/azure.go index ead95406c..79bf1bbc2 100644 --- a/pkg/repository/udmrepo/kopialib/backend/azure.go +++ b/pkg/repository/udmrepo/kopialib/backend/azure.go @@ -25,6 +25,7 @@ import ( "github.com/vmware-tanzu/velero/pkg/repository/udmrepo" "github.com/vmware-tanzu/velero/pkg/repository/udmrepo/kopialib/backend/azure" + "github.com/vmware-tanzu/velero/pkg/repository/udmrepo/kopialib/backend/logging" ) type AzureBackend struct { @@ -38,12 +39,11 @@ func (c *AzureBackend) Setup(ctx context.Context, flags map[string]string, logge c.option = azure.Option{ Config: flags, Limits: setupLimits(ctx, flags), - Logger: logger, } return nil } func (c *AzureBackend) Connect(ctx context.Context, isCreate bool, logger logrus.FieldLogger) (blob.Storage, error) { - c.option.Logger = logger + ctx = logging.WithLogger(ctx, logger) return azure.NewStorage(ctx, &c.option, false) } diff --git a/pkg/repository/udmrepo/kopialib/backend/azure/azure_storage_wrapper.go b/pkg/repository/udmrepo/kopialib/backend/azure/azure_storage_wrapper.go index 967de3dea..5e2443a42 100644 --- a/pkg/repository/udmrepo/kopialib/backend/azure/azure_storage_wrapper.go +++ b/pkg/repository/udmrepo/kopialib/backend/azure/azure_storage_wrapper.go @@ -19,13 +19,12 @@ package azure import ( "context" - "github.com/sirupsen/logrus" - "github.com/kopia/kopia/repo/blob" "github.com/kopia/kopia/repo/blob/azure" "github.com/kopia/kopia/repo/blob/throttling" "github.com/vmware-tanzu/velero/pkg/repository/udmrepo" + "github.com/vmware-tanzu/velero/pkg/repository/udmrepo/kopialib/backend/logging" azureutil "github.com/vmware-tanzu/velero/pkg/util/azure" ) @@ -34,13 +33,12 @@ const ( ) func init() { - blob.AddSupportedStorage(storageType, Option{Logger: logrus.New()}, NewStorage) + blob.AddSupportedStorage(storageType, Option{}, NewStorage) } type Option struct { Config map[string]string `json:"config" kopia:"sensitive"` Limits throttling.Limits - Logger logrus.FieldLogger } type Storage struct { @@ -58,7 +56,10 @@ func (s *Storage) ConnectionInfo() blob.ConnectionInfo { func NewStorage(ctx context.Context, option *Option, isCreate bool) (blob.Storage, error) { cfg := option.Config - client, _, err := azureutil.NewStorageClient(option.Logger, cfg) + // Get logger from context + logger := logging.LoggerFromContext(ctx) + + client, _, err := azureutil.NewStorageClient(logger, cfg) if err != nil { return nil, err } @@ -73,6 +74,8 @@ func NewStorage(ctx context.Context, option *Option, isCreate bool) (blob.Storag return nil, err } + logger.Info("Successfully created Azure storage backend") + return &Storage{ Option: option, Storage: azStorage, diff --git a/pkg/repository/udmrepo/kopialib/backend/file_system.go b/pkg/repository/udmrepo/kopialib/backend/file_system.go index 98f272c46..075099cf8 100644 --- a/pkg/repository/udmrepo/kopialib/backend/file_system.go +++ b/pkg/repository/udmrepo/kopialib/backend/file_system.go @@ -27,6 +27,7 @@ import ( "github.com/pkg/errors" "github.com/vmware-tanzu/velero/pkg/repository/udmrepo" + "github.com/vmware-tanzu/velero/pkg/repository/udmrepo/kopialib/backend/logging" ) type FsBackend struct { @@ -50,6 +51,8 @@ func (c *FsBackend) Setup(ctx context.Context, flags map[string]string, logger l c.options.FileMode = defaultFileMode c.options.DirectoryMode = defaultDirMode + ctx = logging.WithLogger(ctx, logger) + c.options.Limits = setupLimits(ctx, flags) return nil @@ -59,6 +62,7 @@ func (c *FsBackend) Connect(ctx context.Context, isCreate bool, logger logrus.Fi if !filepath.IsAbs(c.options.Path) { return nil, errors.Errorf("filesystem repository path is not absolute, path: %s", c.options.Path) } + ctx = logging.WithLogger(ctx, logger) return filesystem.New(ctx, &c.options, isCreate) } diff --git a/pkg/repository/udmrepo/kopialib/backend/gcs.go b/pkg/repository/udmrepo/kopialib/backend/gcs.go index 243f054dc..2c46609a4 100644 --- a/pkg/repository/udmrepo/kopialib/backend/gcs.go +++ b/pkg/repository/udmrepo/kopialib/backend/gcs.go @@ -25,6 +25,7 @@ import ( "github.com/kopia/kopia/repo/blob/gcs" "github.com/vmware-tanzu/velero/pkg/repository/udmrepo" + "github.com/vmware-tanzu/velero/pkg/repository/udmrepo/kopialib/backend/logging" ) type GCSBackend struct { @@ -46,11 +47,14 @@ func (c *GCSBackend) Setup(ctx context.Context, flags map[string]string, logger c.options.Prefix = optionalHaveString(udmrepo.StoreOptionPrefix, flags) c.options.ReadOnly = optionalHaveBool(ctx, udmrepo.StoreOptionGcsReadonly, flags) + ctx = logging.WithLogger(ctx, logger) + c.options.Limits = setupLimits(ctx, flags) return nil } func (c *GCSBackend) Connect(ctx context.Context, isCreate bool, logger logrus.FieldLogger) (blob.Storage, error) { + ctx = logging.WithLogger(ctx, logger) return gcs.New(ctx, &c.options, false) } diff --git a/pkg/repository/udmrepo/kopialib/backend/logging/context.go b/pkg/repository/udmrepo/kopialib/backend/logging/context.go new file mode 100644 index 000000000..4ac63d302 --- /dev/null +++ b/pkg/repository/udmrepo/kopialib/backend/logging/context.go @@ -0,0 +1,38 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logging + +import ( + "context" + + "github.com/sirupsen/logrus" +) + +type ctxKeyLogger struct{} + +// WithLogger returns a new context with the provided logger. +func WithLogger(ctx context.Context, logger logrus.FieldLogger) context.Context { + return context.WithValue(ctx, ctxKeyLogger{}, logger) +} + +// LoggerFromContext retrieves the logger from the context, or returns a default logger if none found. +func LoggerFromContext(ctx context.Context) logrus.FieldLogger { + if logger, ok := ctx.Value(ctxKeyLogger{}).(logrus.FieldLogger); ok && logger != nil { + return logger + } + return logrus.New() +} diff --git a/pkg/repository/udmrepo/kopialib/backend/s3.go b/pkg/repository/udmrepo/kopialib/backend/s3.go index 2f7f7d93c..202c3c449 100644 --- a/pkg/repository/udmrepo/kopialib/backend/s3.go +++ b/pkg/repository/udmrepo/kopialib/backend/s3.go @@ -25,6 +25,7 @@ import ( "github.com/kopia/kopia/repo/blob/s3" "github.com/vmware-tanzu/velero/pkg/repository/udmrepo" + "github.com/vmware-tanzu/velero/pkg/repository/udmrepo/kopialib/backend/logging" ) type S3Backend struct { @@ -48,11 +49,14 @@ func (c *S3Backend) Setup(ctx context.Context, flags map[string]string, logger l c.options.SessionToken = optionalHaveString(udmrepo.StoreOptionS3Token, flags) c.options.RootCA = optionalHaveBase64(ctx, udmrepo.StoreOptionCACert, flags) + ctx = logging.WithLogger(ctx, logger) + c.options.Limits = setupLimits(ctx, flags) return nil } func (c *S3Backend) Connect(ctx context.Context, isCreate bool, logger logrus.FieldLogger) (blob.Storage, error) { + ctx = logging.WithLogger(ctx, logger) return s3.New(ctx, &c.options, false) } diff --git a/pkg/util/collections/includes_excludes.go b/pkg/util/collections/includes_excludes.go index 1cff16f3e..19ef926f0 100644 --- a/pkg/util/collections/includes_excludes.go +++ b/pkg/util/collections/includes_excludes.go @@ -19,6 +19,8 @@ package collections import ( "strings" + "github.com/vmware-tanzu/velero/internal/resourcepolicies" + "github.com/gobwas/glob" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -105,14 +107,63 @@ func (ie *IncludesExcludes) ShouldInclude(s string) bool { return ie.includes.Len() == 0 || ie.includes.Has("*") || ie.includes.match(s) } +// IncludesString returns a string containing all of the includes, separated by commas, or * if the +// list is empty. +func (ie *IncludesExcludes) IncludesString() string { + return asString(ie.GetIncludes(), "*") +} + +// ExcludesString returns a string containing all of the excludes, separated by commas, or if the +// list is empty. +func (ie *IncludesExcludes) ExcludesString() string { + return asString(ie.GetExcludes(), "") +} + +// IncludeEverything returns true if the includes list is empty or '*' +// and the excludes list is empty, or false otherwise. +func (ie *IncludesExcludes) IncludeEverything() bool { + return ie.excludes.Len() == 0 && (ie.includes.Len() == 0 || (ie.includes.Len() == 1 && ie.includes.Has("*"))) +} + +// GetResourceIncludesExcludes takes the lists of resources to include and exclude, uses the +// discovery helper to resolve them to fully-qualified group-resource names, and returns an +// IncludesExcludes list. +func GetResourceIncludesExcludes(helper discovery.Helper, includes, excludes []string) *IncludesExcludes { + resources := generateIncludesExcludes( + includes, + excludes, + func(item string) string { + gvr, _, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) + if err != nil { + // If we can't resolve it, return it as-is. This prevents the generated + // includes-excludes list from including *everything*, if none of the includes + // can be resolved. ref. https://github.com/vmware-tanzu/velero/issues/2461 + return item + } + + gr := gvr.GroupResource() + return gr.String() + }, + ) + + return resources +} + +func asString(in []string, empty string) string { + if len(in) == 0 { + return empty + } + return strings.Join(in, ", ") +} + // IncludesExcludesInterface is used as polymorphic IncludesExcludes for Global and scope // resources Include/Exclude. type IncludesExcludesInterface interface { - // Check whether the type name passed in by parameter should be included. + // ShouldInclude checks whether the type name passed in by parameter should be included. // typeName should be k8s.io/apimachinery/pkg/runtime/schema GroupResource's String() result. ShouldInclude(typeName string) bool - // Check whether the type name passed in by parameter should be excluded. + // ShouldExclude checks whether the type name passed in by parameter should be excluded. // typeName should be k8s.io/apimachinery/pkg/runtime/schema GroupResource's String() result. ShouldExclude(typeName string) bool } @@ -188,6 +239,20 @@ func (ie *GlobalIncludesExcludes) ShouldExclude(typeName string) bool { return false } +func GetGlobalResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, includes, excludes []string, includeClusterResources *bool, nsIncludesExcludes IncludesExcludes) *GlobalIncludesExcludes { + ret := &GlobalIncludesExcludes{ + resourceFilter: *GetResourceIncludesExcludes(helper, includes, excludes), + includeClusterResources: includeClusterResources, + namespaceFilter: nsIncludesExcludes, + helper: helper, + logger: logger, + } + + logger.Infof("Including resources: %s", ret.resourceFilter.IncludesString()) + logger.Infof("Excluding resources: %s", ret.resourceFilter.ExcludesString()) + return ret +} + type ScopeIncludesExcludes struct { namespaceScopedResourceFilter IncludesExcludes // namespace-scoped resource filter clusterScopedResourceFilter IncludesExcludes // cluster-scoped resource filter @@ -259,29 +324,64 @@ func (ie *ScopeIncludesExcludes) ShouldExclude(typeName string) bool { return false } -// IncludesString returns a string containing all of the includes, separated by commas, or * if the -// list is empty. -func (ie *IncludesExcludes) IncludesString() string { - return asString(ie.GetIncludes(), "*") -} - -// ExcludesString returns a string containing all of the excludes, separated by commas, or if the -// list is empty. -func (ie *IncludesExcludes) ExcludesString() string { - return asString(ie.GetExcludes(), "") -} - -func asString(in []string, empty string) string { - if len(in) == 0 { - return empty +func (ie *ScopeIncludesExcludes) CombineWithPolicy(policy *resourcepolicies.IncludeExcludePolicy) { + if policy == nil { + return } - return strings.Join(in, ", ") -} - -// IncludeEverything returns true if the includes list is empty or '*' -// and the excludes list is empty, or false otherwise. -func (ie *IncludesExcludes) IncludeEverything() bool { - return ie.excludes.Len() == 0 && (ie.includes.Len() == 0 || (ie.includes.Len() == 1 && ie.includes.Has("*"))) + mapFunc := scopeResourceMapFunc(ie.helper) + for _, item := range policy.ExcludedNamespaceScopedResources { + resolvedItem := mapFunc(item, true) + if resolvedItem == "" { + continue + } + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not include this item and this item is not yet in the excludes filter. + if !ie.namespaceScopedResourceFilter.includes.match(resolvedItem) && + !ie.namespaceScopedResourceFilter.excludes.match(resolvedItem) { + ie.namespaceScopedResourceFilter.Excludes(resolvedItem) + } + } + for _, item := range policy.IncludedNamespaceScopedResources { + resolvedItem := mapFunc(item, true) + if resolvedItem == "" { + continue + } + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not exclude this item and this item is not yet in the includes filter. + if !ie.namespaceScopedResourceFilter.includes.match(resolvedItem) && + !ie.namespaceScopedResourceFilter.excludes.match(resolvedItem) { + ie.namespaceScopedResourceFilter.Includes(resolvedItem) + } + } + for _, item := range policy.ExcludedClusterScopedResources { + resolvedItem := mapFunc(item, false) + if resolvedItem == "" { + continue + } + if !ie.clusterScopedResourceFilter.includes.match(resolvedItem) && + !ie.clusterScopedResourceFilter.excludes.match(resolvedItem) { + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not exclude this item and this item is not yet in the includes filter. + ie.clusterScopedResourceFilter.Excludes(resolvedItem) + } + } + for _, item := range policy.IncludedClusterScopedResources { + resolvedItem := mapFunc(item, false) + if resolvedItem == "" { + continue + } + if !ie.clusterScopedResourceFilter.includes.match(resolvedItem) && + !ie.clusterScopedResourceFilter.excludes.match(resolvedItem) { + // The existing includeExcludes in the struct has higher priority, therefore, we should only add the item to the filter + // when the struct does not exclude this item and this item is not yet in the includes filter. + ie.clusterScopedResourceFilter.Includes(resolvedItem) + } + } + ie.logger.Infof("Scoped resource includes/excludes after combining with resource policy") + ie.logger.Infof("Including namespace-scoped resources: %s", ie.namespaceScopedResourceFilter.IncludesString()) + ie.logger.Infof("Excluding namespace-scoped resources: %s", ie.namespaceScopedResourceFilter.ExcludesString()) + ie.logger.Infof("Including cluster-scoped resources: %s", ie.clusterScopedResourceFilter.GetIncludes()) + ie.logger.Infof("Excluding cluster-scoped resources: %s", ie.clusterScopedResourceFilter.ExcludesString()) } func newScopeIncludesExcludes(nsIncludesExcludes IncludesExcludes, helper discovery.Helper, logger logrus.FieldLogger) *ScopeIncludesExcludes { @@ -302,6 +402,43 @@ func newScopeIncludesExcludes(nsIncludesExcludes IncludesExcludes, helper discov return ret } +// GetScopeResourceIncludesExcludes function is similar with GetResourceIncludesExcludes, +// but it's used for scoped Includes/Excludes, and can handle both cluster-scoped and namespace-scoped resources. +func GetScopeResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, namespaceIncludes, namespaceExcludes, clusterIncludes, clusterExcludes []string, nsIncludesExcludes IncludesExcludes) *ScopeIncludesExcludes { + ret := generateScopedIncludesExcludes( + namespaceIncludes, + namespaceExcludes, + clusterIncludes, + clusterExcludes, + scopeResourceMapFunc(helper), + nsIncludesExcludes, + helper, + logger, + ) + logger.Infof("Scoped resource includes/excludes after initialization") + logger.Infof("Including namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.IncludesString()) + logger.Infof("Excluding namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.ExcludesString()) + logger.Infof("Including cluster-scoped resources: %s", ret.clusterScopedResourceFilter.GetIncludes()) + logger.Infof("Excluding cluster-scoped resources: %s", ret.clusterScopedResourceFilter.ExcludesString()) + + return ret +} + +func scopeResourceMapFunc(helper discovery.Helper) func(string, bool) string { + return func(item string, namespaced bool) string { + gvr, resource, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) + if err != nil { + return item + } + if resource.Namespaced != namespaced { + return "" + } + + gr := gvr.GroupResource() + return gr.String() + } +} + // ValidateIncludesExcludes checks provided lists of included and excluded // items to ensure they are a valid set of IncludesExcludes data. func ValidateIncludesExcludes(includesList, excludesList []string) []error { @@ -470,97 +607,16 @@ func generateFilter(filter globStringSet, resources []string, mapFunc func(strin } } -// GetResourceIncludesExcludes takes the lists of resources to include and exclude, uses the -// discovery helper to resolve them to fully-qualified group-resource names, and returns an -// IncludesExcludes list. -func GetResourceIncludesExcludes(helper discovery.Helper, includes, excludes []string) *IncludesExcludes { - resources := generateIncludesExcludes( - includes, - excludes, - func(item string) string { - gvr, _, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) - if err != nil { - // If we can't resolve it, return it as-is. This prevents the generated - // includes-excludes list from including *everything*, if none of the includes - // can be resolved. ref. https://github.com/vmware-tanzu/velero/issues/2461 - return item - } - - gr := gvr.GroupResource() - return gr.String() - }, - ) - - return resources -} - -func GetGlobalResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, includes, excludes []string, includeClusterResources *bool, nsIncludesExcludes IncludesExcludes) *GlobalIncludesExcludes { - ret := &GlobalIncludesExcludes{ - resourceFilter: *GetResourceIncludesExcludes(helper, includes, excludes), - includeClusterResources: includeClusterResources, - namespaceFilter: nsIncludesExcludes, - helper: helper, - logger: logger, - } - - logger.Infof("Including resources: %s", ret.resourceFilter.IncludesString()) - logger.Infof("Excluding resources: %s", ret.resourceFilter.ExcludesString()) - return ret -} - -// GetScopeResourceIncludesExcludes function is similar with GetResourceIncludesExcludes, -// but it's used for scoped Includes/Excludes, and can handle both cluster-scoped and namespace-scoped resources. -func GetScopeResourceIncludesExcludes(helper discovery.Helper, logger logrus.FieldLogger, namespaceIncludes, namespaceExcludes, clusterIncludes, clusterExcludes []string, nsIncludesExcludes IncludesExcludes) *ScopeIncludesExcludes { - ret := generateScopedIncludesExcludes( - namespaceIncludes, - namespaceExcludes, - clusterIncludes, - clusterExcludes, - func(item string, namespaced bool) string { - gvr, resource, err := helper.ResourceFor(schema.ParseGroupResource(item).WithVersion("")) - if err != nil { - return item - } - if resource.Namespaced != namespaced { - return "" - } - - gr := gvr.GroupResource() - return gr.String() - }, - nsIncludesExcludes, - helper, - logger, - ) - logger.Infof("Including namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.IncludesString()) - logger.Infof("Excluding namespace-scoped resources: %s", ret.namespaceScopedResourceFilter.ExcludesString()) - logger.Infof("Including cluster-scoped resources: %s", ret.clusterScopedResourceFilter.GetIncludes()) - logger.Infof("Excluding cluster-scoped resources: %s", ret.clusterScopedResourceFilter.ExcludesString()) - - return ret -} - // UseOldResourceFilters checks whether to use old resource filters (IncludeClusterResources, // IncludedResources and ExcludedResources), depending the backup's filters setting. // New filters are IncludedClusterScopedResources, ExcludedClusterScopedResources, // IncludedNamespaceScopedResources and ExcludedNamespaceScopedResources. +// If all resource filters are none, it is treated as using new parameter filters. func UseOldResourceFilters(backupSpec velerov1api.BackupSpec) bool { - // If all resource filters are none, it is treated as using old parameter filters. - if backupSpec.IncludeClusterResources == nil && - len(backupSpec.IncludedResources) == 0 && - len(backupSpec.ExcludedResources) == 0 && - len(backupSpec.IncludedClusterScopedResources) == 0 && - len(backupSpec.ExcludedClusterScopedResources) == 0 && - len(backupSpec.IncludedNamespaceScopedResources) == 0 && - len(backupSpec.ExcludedNamespaceScopedResources) == 0 { - return true - } - if backupSpec.IncludeClusterResources != nil || len(backupSpec.IncludedResources) > 0 || len(backupSpec.ExcludedResources) > 0 { return true } - return false } diff --git a/pkg/util/collections/includes_excludes_test.go b/pkg/util/collections/includes_excludes_test.go index a3466066c..34e5f9987 100644 --- a/pkg/util/collections/includes_excludes_test.go +++ b/pkg/util/collections/includes_excludes_test.go @@ -19,6 +19,8 @@ package collections import ( "testing" + "github.com/vmware-tanzu/velero/internal/resourcepolicies" + "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" @@ -741,6 +743,100 @@ func TestGetScopedResourceIncludesExcludes(t *testing.T) { } } +func TestScopeIncludesExcludes_CombineWithPolicy(t *testing.T) { + apiResources := []*test.APIResource{test.Deployments(), test.Pods(), test.ConfigMaps(), test.Secrets(), test.PVs(), test.CRDs(), test.ServiceAccounts()} + tests := []struct { + name string + namespaceScopedIncludes []string + namespaceScopedExcludes []string + clusterScopedIncludes []string + clusterScopedExcludes []string + policy *resourcepolicies.IncludeExcludePolicy + verify func(sie ScopeIncludesExcludes) bool + }{ + { + name: "When policy is nil, the original includes excludes filters should not change", + namespaceScopedIncludes: []string{"deployments", "pods"}, + namespaceScopedExcludes: []string{"configmaps"}, + clusterScopedIncludes: []string{"persistentvolumes"}, + clusterScopedExcludes: []string{"crds"}, + policy: nil, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.clusterScopedResourceFilter.ShouldInclude("persistentvolumes") && + !sie.clusterScopedResourceFilter.ShouldInclude("crds") && + sie.namespaceScopedResourceFilter.ShouldInclude("deployments") && + !sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") + }, + }, + { + name: "policy includes excludes should be merged to the original includes excludes when there's no conflict", + namespaceScopedIncludes: []string{"pods"}, + namespaceScopedExcludes: []string{"configmaps"}, + clusterScopedIncludes: []string{}, + clusterScopedExcludes: []string{"crds"}, + policy: &resourcepolicies.IncludeExcludePolicy{ + IncludedNamespaceScopedResources: []string{"deployments"}, + ExcludedNamespaceScopedResources: []string{"secrets"}, + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{}, + }, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.clusterScopedResourceFilter.ShouldInclude("persistentvolumes") && + !sie.clusterScopedResourceFilter.ShouldInclude("crds") && + sie.namespaceScopedResourceFilter.ShouldInclude("deployments") && + !sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") && + !sie.namespaceScopedResourceFilter.ShouldInclude("secrets") + }, + }, + { + name: "when there are conflicts, the existing includes excludes filters have higher priorities", + namespaceScopedIncludes: []string{"pods", "deployments"}, + namespaceScopedExcludes: []string{"configmaps"}, + clusterScopedIncludes: []string{"crds"}, + clusterScopedExcludes: []string{"persistentvolumes"}, + policy: &resourcepolicies.IncludeExcludePolicy{ + IncludedNamespaceScopedResources: []string{"configmaps"}, + ExcludedNamespaceScopedResources: []string{"pods", "secrets"}, + IncludedClusterScopedResources: []string{"persistentvolumes"}, + ExcludedClusterScopedResources: []string{"crds"}, + }, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.clusterScopedResourceFilter.ShouldInclude("crds") && + !sie.clusterScopedResourceFilter.ShouldInclude("persistentvolumes") && + sie.namespaceScopedResourceFilter.ShouldInclude("pods") && + !sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") && + !sie.namespaceScopedResourceFilter.ShouldInclude("secrets") + }, + }, + { + name: "verify the case when there's '*' in the original include filter", + namespaceScopedIncludes: []string{"*"}, + namespaceScopedExcludes: []string{}, + clusterScopedIncludes: []string{}, + clusterScopedExcludes: []string{}, + policy: &resourcepolicies.IncludeExcludePolicy{ + IncludedNamespaceScopedResources: []string{"deployments", "pods"}, + ExcludedNamespaceScopedResources: []string{"configmaps", "secrets"}, + IncludedClusterScopedResources: []string{}, + ExcludedClusterScopedResources: []string{}, + }, + verify: func(sie ScopeIncludesExcludes) bool { + return sie.namespaceScopedResourceFilter.ShouldInclude("configmaps") && + sie.namespaceScopedResourceFilter.ShouldInclude("secrets") + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + logger := logrus.StandardLogger() + discoveryHelper := setupDiscoveryClientWithResources(apiResources) + sie := GetScopeResourceIncludesExcludes(discoveryHelper, logger, tc.namespaceScopedIncludes, tc.namespaceScopedExcludes, tc.clusterScopedIncludes, tc.clusterScopedExcludes, *NewIncludesExcludes()) + sie.CombineWithPolicy(tc.policy) + assert.True(t, tc.verify(*sie)) + }) + } +} + func TestUseOldResourceFilters(t *testing.T) { tests := []struct { name string @@ -748,9 +844,9 @@ func TestUseOldResourceFilters(t *testing.T) { useOldResourceFilters bool }{ { - name: "backup with no filters should use old filters", + name: "backup with no filters should use new filters", backup: *defaultBackup().Result(), - useOldResourceFilters: true, + useOldResourceFilters: false, }, { name: "backup with only old filters should use old filters", diff --git a/pkg/util/kube/priority_class.go b/pkg/util/kube/priority_class.go new file mode 100644 index 000000000..3f2ddc256 --- /dev/null +++ b/pkg/util/kube/priority_class.go @@ -0,0 +1,64 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kube + +import ( + "context" + + "github.com/sirupsen/logrus" + schedulingv1 "k8s.io/api/scheduling/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// ValidatePriorityClass checks if the specified priority class exists in the cluster +// Returns true if the priority class exists or if priorityClassName is empty +// Returns false if the priority class doesn't exist or validation fails +// Logs warnings when the priority class doesn't exist +func ValidatePriorityClass(ctx context.Context, kubeClient kubernetes.Interface, priorityClassName string, logger logrus.FieldLogger) bool { + if priorityClassName == "" { + return true + } + + _, err := kubeClient.SchedulingV1().PriorityClasses().Get(ctx, priorityClassName, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + logger.Warnf("Priority class %q not found in cluster. Pod creation may fail if the priority class doesn't exist when pods are scheduled.", priorityClassName) + } else { + logger.WithError(err).Warnf("Failed to validate priority class %q", priorityClassName) + } + return false + } + logger.Infof("Validated priority class %q exists in cluster", priorityClassName) + return true +} + +// ValidatePriorityClassWithClient checks if the specified priority class exists in the cluster using controller-runtime client +// Returns nil if the priority class exists or if priorityClassName is empty +// Returns error if the priority class doesn't exist or validation fails +func ValidatePriorityClassWithClient(ctx context.Context, cli client.Client, priorityClassName string) error { + if priorityClassName == "" { + return nil + } + + priorityClass := &schedulingv1.PriorityClass{} + err := cli.Get(ctx, types.NamespacedName{Name: priorityClassName}, priorityClass) + return err +} diff --git a/pkg/util/kube/priority_class_test.go b/pkg/util/kube/priority_class_test.go new file mode 100644 index 000000000..9d6a52599 --- /dev/null +++ b/pkg/util/kube/priority_class_test.go @@ -0,0 +1,128 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kube + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + schedulingv1 "k8s.io/api/scheduling/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + + velerotesting "github.com/vmware-tanzu/velero/pkg/test" +) + +func TestValidatePriorityClass(t *testing.T) { + tests := []struct { + name string + priorityClassName string + existingPCs []runtime.Object + clientReactors []k8stesting.ReactionFunc + expectedLogs []string + expectedLogLevel string + expectedResult bool + }{ + { + name: "empty priority class name should return without logging", + priorityClassName: "", + existingPCs: nil, + expectedLogs: nil, + expectedResult: true, + }, + { + name: "existing priority class should log info message", + priorityClassName: "high-priority", + existingPCs: []runtime.Object{ + &schedulingv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "high-priority", + }, + Value: 100, + }, + }, + expectedLogs: []string{"Validated priority class \\\"high-priority\\\" exists in cluster"}, + expectedLogLevel: "info", + expectedResult: true, + }, + { + name: "non-existing priority class should log warning", + priorityClassName: "does-not-exist", + existingPCs: nil, + expectedLogs: []string{"Priority class \\\"does-not-exist\\\" not found in cluster. Pod creation may fail if the priority class doesn't exist when pods are scheduled."}, + expectedLogLevel: "warning", + expectedResult: false, + }, + { + name: "API error should log warning with error", + priorityClassName: "test-priority", + existingPCs: nil, + clientReactors: []k8stesting.ReactionFunc{ + func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + if action.GetVerb() == "get" && action.GetResource().Resource == "priorityclasses" { + return true, nil, fmt.Errorf("API server error") + } + return false, nil, nil + }, + }, + expectedLogs: []string{"Failed to validate priority class \\\"test-priority\\\""}, + expectedLogLevel: "warning", + expectedResult: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Create fake client with existing priority classes + kubeClient := fake.NewSimpleClientset(test.existingPCs...) + + // Add any custom reactors + for _, reactor := range test.clientReactors { + kubeClient.PrependReactor("*", "*", reactor) + } + + // Create test logger with buffer + buffer := []string{} + logger := velerotesting.NewMultipleLogger(&buffer) + + // Call the function + result := ValidatePriorityClass(t.Context(), kubeClient, test.priorityClassName, logger) + + // Check result + assert.Equal(t, test.expectedResult, result, "ValidatePriorityClass returned unexpected result") + + // Check logs + if test.expectedLogs == nil { + assert.Empty(t, buffer) + } else { + assert.Len(t, buffer, len(test.expectedLogs)) + + for i, expectedLog := range test.expectedLogs { + assert.Contains(t, buffer[i], expectedLog) + if test.expectedLogLevel == "info" { + assert.Contains(t, buffer[i], "level=info") + } else if test.expectedLogLevel == "warning" { + assert.Contains(t, buffer[i], "level=warning") + } + } + } + }) + } +} diff --git a/site/content/docs/main/basic-install.md b/site/content/docs/main/basic-install.md index b84afcdde..79c432092 100644 --- a/site/content/docs/main/basic-install.md +++ b/site/content/docs/main/basic-install.md @@ -4,7 +4,7 @@ layout: docs --- Use this doc to get a basic installation of Velero. -Refer [this document](customize-installation.md) to customize your installation. +Refer [this document](customize-installation.md) to customize your installation, including setting priority classes for Velero components. ## Prerequisites diff --git a/site/content/docs/main/csi-snapshot-data-movement.md b/site/content/docs/main/csi-snapshot-data-movement.md index 304cf32f6..930fc1be5 100644 --- a/site/content/docs/main/csi-snapshot-data-movement.md +++ b/site/content/docs/main/csi-snapshot-data-movement.md @@ -18,7 +18,11 @@ On the other hand, there are quite some cases that CSI snapshot is not available CSI Snapshot Data Movement supports both built-in data mover and customized data movers. For the details of how Velero works with customized data movers, check the [Volume Snapshot Data Movement design][1]. Velero provides a built-in data mover which uses Velero built-in uploaders (at present the available uploader is Kopia uploader) to read the snapshot data and write to the Unified Repository (by default implemented by Kopia repository). -Velero built-in data mover restores both volume data and metadata, so the data mover pods need to run as root user. +Velero built-in data mover restores both volume data and metadata, so the data mover pods need to run as root user. + +### Priority Class Configuration + +For Velero built-in data mover, data mover pods launched during CSI snapshot data movement will use the priority class name configured in the node-agent configmap. The node-agent daemonset itself gets its priority class from the `--node-agent-priority-class-name` flag during Velero installation. This can help ensure proper scheduling behavior in resource-constrained environments. For more details on configuring data mover pod resources, see [Data Movement Pod Resource Configuration][11]. ## Setup CSI Snapshot Data Movement @@ -364,7 +368,7 @@ At present, Velero doesn't allow to set `ReadOnlyRootFileSystem` parameter to da Both the uploader and repository consume remarkable CPU/memory during the backup/restore, especially for massive small files or large backup size cases. For Velero built-in data mover, Velero uses [BestEffort as the QoS][13] for data mover pods (so no CPU/memory request/limit is set), so that backups/restores wouldn't fail due to resource throttling in any cases. -If you want to constraint the CPU/memory usage, you need to [Customize Data Mover Pod Resource Limits][11]. The CPU/memory consumption is always related to the scale of data to be backed up/restored, refer to [Performance Guidance][12] for more details, so it is highly recommended that you perform your own testing to find the best resource limits for your data. +If you want to constraint the CPU/memory usage, you need to [Customize Data Mover Pod Resource Limits][11]. The CPU/memory consumption is always related to the scale of data to be backed up/restored, refer to [Performance Guidance][12] for more details, so it is highly recommended that you perform your own testing to find the best resource limits for your data. During the restore, the repository may also cache data/metadata so as to reduce the network footprint and speed up the restore. The repository uses its own policy to store and clean up the cache. For Kopia repository, the cache is stored in the data mover pod's root file system. Velero allows you to configure a limit of the cache size so that the data mover pod won't be evicted due to running out of the ephemeral storage. For more details, check [Backup Repository Configuration][17]. diff --git a/site/content/docs/main/customize-installation.md b/site/content/docs/main/customize-installation.md index e6a8797dc..a7cf965a6 100644 --- a/site/content/docs/main/customize-installation.md +++ b/site/content/docs/main/customize-installation.md @@ -96,6 +96,53 @@ Note that if you specify `--colorized=true` as a CLI option it will override the config file setting. +## Set priority class names for Velero components + +You can set priority class names for different Velero components during installation. This allows you to influence the scheduling and eviction behavior of Velero pods, which can be useful in clusters where resource contention is high. + +### Priority class configuration options: + +1. **Velero server deployment**: Use the `--server-priority-class-name` flag +2. **Node agent daemonset**: Use the `--node-agent-priority-class-name` flag +3. **Data mover pods**: Configure through the node-agent configmap (see below) +4. **Maintenance jobs**: Configure through the repository maintenance job configmap (see below) + +```bash +velero install \ + --server-priority-class-name= \ + --node-agent-priority-class-name= +``` + +### Configuring priority classes for data mover pods and maintenance jobs + +For data mover pods and maintenance jobs, priority classes are configured through ConfigMaps that must be created before installation: + +**Data mover pods** (via node-agent configmap): +```bash +kubectl create configmap node-agent-config -n velero --from-file=config.json=/dev/stdin < ``` +### Priority Class + +Data mover pods will use the priorityClassName configured in the node-agent configmap. The priorityClassName for data mover pods is configured through the node-agent configmap (specified via the `--node-agent-configmap` flag), while the node-agent daemonset itself uses the priority class set by the `--node-agent-priority-class-name` flag during Velero installation. + +#### When to Use Priority Classes + +**Higher Priority Classes** (e.g., `system-cluster-critical`, `system-node-critical`, or custom high-priority): +- When you have dedicated nodes for backup operations +- When backup/restore operations are time-critical +- When you want to ensure data mover pods are scheduled even during high cluster utilization +- For disaster recovery scenarios where restore speed is critical + +**Lower Priority Classes** (e.g., `low-priority` or negative values): +- When you want to protect production workload performance +- When backup operations can be delayed during peak hours +- When cluster resources are limited and production workloads take precedence +- For non-critical backup operations that can tolerate delays + +#### Consequences of Priority Class Settings + +**High Priority**: +- ✅ Data mover pods are more likely to be scheduled quickly +- ✅ Less likely to be preempted by other workloads +- ❌ May cause resource pressure on production workloads +- ❌ Could lead to production pod evictions in extreme cases + +**Low Priority**: +- ✅ Production workloads are protected from resource competition +- ✅ Cluster stability is maintained during backup operations +- ❌ Backup/restore operations may take longer to start +- ❌ Data mover pods may be preempted, causing backup failures +- ❌ In resource-constrained clusters, backups might not run at all + +#### Example Configuration + +To configure priority class for data mover pods, include it in your node-agent configmap: + +```json +{ + "podResources": { + "cpuRequest": "1000m", + "cpuLimit": "2000m", + "memoryRequest": "1Gi", + "memoryLimit": "4Gi" + }, + "priorityClassName": "backup-priority" +} +``` + +First, create the priority class in your cluster: + +```yaml +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: backup-priority +value: 1000 +globalDefault: false +description: "Priority class for Velero data mover pods" +``` + +Then create or update the node-agent configmap: + +```bash +kubectl create cm node-agent-config -n velero --from-file=node-agent-config.json +``` + +**Note**: If the specified priority class doesn't exist in the cluster when data mover pods are created, the pods will fail to schedule. Velero validates the priority class at startup and logs a warning if it doesn't exist, but the pods will still attempt to use it. + [1]: csi-snapshot-data-movement.md [2]: file-system-backup.md [3]: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/ diff --git a/site/content/docs/main/file-system-backup.md b/site/content/docs/main/file-system-backup.md index ab12accd4..b4d904f1b 100644 --- a/site/content/docs/main/file-system-backup.md +++ b/site/content/docs/main/file-system-backup.md @@ -691,6 +691,10 @@ spec: ...... ``` +## Priority Class Configuration + +For Velero built-in data mover, data mover pods launched during file system backup will use the priority class name configured in the node-agent configmap. The node-agent daemonset itself gets its priority class from the `--node-agent-priority-class-name` flag during Velero installation. This can help ensure proper scheduling behavior in resource-constrained environments. For more details on configuring data mover pod resources, see [Data Movement Pod Resource Configuration][data-movement-config]. + ## Resource Consumption Both the uploader and repository consume remarkable CPU/memory during the backup/restore, especially for massive small files or large backup size cases. @@ -762,3 +766,4 @@ Velero still effectively manage restic repository, though you cannot write any n [18]: backup-repository-configuration.md [19]: node-agent-concurrency.md [20]: node-agent-prepare-queue-length.md +[data-movement-config]: data-movement-pod-resource-configuration.md diff --git a/site/content/docs/main/repository-maintenance.md b/site/content/docs/main/repository-maintenance.md index 10237de5c..c9c76a677 100644 --- a/site/content/docs/main/repository-maintenance.md +++ b/site/content/docs/main/repository-maintenance.md @@ -60,47 +60,63 @@ This can be done by adding multiple entries in the `LoadAffinity` array. The sample of the ```repo-maintenance-job-configmap``` ConfigMap for the above scenario is as below: ``` bash -cat < repo-maintenance-job-config.json -{ - "global": { - "podResources": { - "cpuRequest": "100m", - "cpuLimit": "200m", - "memoryRequest": "100Mi", - "memoryLimit": "200Mi" +cat < repo-maintenance-job-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: repo-maintenance-job-config + namespace: velero +data: + global: | + { + "podResources": { + "cpuRequest": "100m", + "cpuLimit": "200m", + "memoryRequest": "100Mi", + "memoryLimit": "200Mi" + }, + "keepLatestMaintenanceJobs": 1, + "loadAffinity": [ + { + "nodeSelector": { + "matchExpressions": [ + { + "key": "cloud.google.com/machine-family", + "operator": "In", + "values": [ + "e2" + ] + } + ] + } }, - "loadAffinity": [ - { - "nodeSelector": { - "matchExpressions": [ - { - "key": "cloud.google.com/machine-family", - "operator": "In", - "values": [ - "e2" - ] - } - ] - } - }, - { - "nodeSelector": { - "matchExpressions": [ - { - "key": "topology.kubernetes.io/zone", - "operator": "In", - "values": [ - "us-central1-a", - "us-central1-b", - "us-central1-c" - ] - } - ] - } - } - ] + { + "nodeSelector": { + "matchExpressions": [ + { + "key": "topology.kubernetes.io/zone", + "operator": "In", + "values": [ + "us-central1-a", + "us-central1-b", + "us-central1-c" + ] + } + ] + } + } + ] + } + kibishii-default-kopia: | + { + "podResources": { + "cpuRequest": "200m", + "cpuLimit": "400m", + "memoryRequest": "200Mi", + "memoryLimit": "400Mi" + }, + "keepLatestMaintenanceJobs": 2 } -} EOF ``` This sample showcases two affinity configurations: @@ -110,7 +126,7 @@ The nodes matching one of the two conditions are selected. To create the configMap, users need to save something like the above sample to a json file and then run below command: ``` -kubectl create cm repo-maintenance-job-config -n velero --from-file=repo-maintenance-job-config.json +kubectl apply -f repo-maintenance-job-config.yaml ``` ### Log @@ -155,7 +171,7 @@ Status: - `Recent Maintenance` keeps the status of the recent 3 maintenance jobs, including its start time, result (succeeded/failed), completion time (if the maintenance job succeeded), or error message (if the maintenance failed) ### Others -Maintenance jobs will inherit toleration, nodeSelector, service account, image, environment variables, cloud-credentials etc. from Velero deployment. +Maintenance jobs will inherit toleration, nodeSelector, service account, image, environment variables, cloud-credentials, priorityClassName etc. from Velero deployment. For labels and annotations, maintenance jobs do NOT inherit all labels and annotations from the Velero deployment. Instead, they include: @@ -171,7 +187,24 @@ For labels and annotations, maintenance jobs do NOT inherit all labels and annot * `iam.amazonaws.com/role` **Important:** Other labels and annotations from the Velero deployment are NOT inherited by maintenance jobs. This is by design to ensure only specific labels and annotations required for cloud provider identity systems are propagated. -Maintenance jobs will not run for backup repositories whose backup storage location is set as readOnly. +Maintenance jobs will not run for backup repositories whose backup storage location is set as readOnly. + +#### Priority Class Configuration +Maintenance jobs can be configured with a specific priority class through the repository maintenance job ConfigMap. The priority class name should be specified in the global configuration section: + +```json +{ + "global": { + "priorityClassName": "low-priority", + "podResources": { + "cpuRequest": "100m", + "memoryRequest": "128Mi" + } + } +} +``` + +Note that priority class configuration is only read from the global configuration section, ensuring all maintenance jobs use the same priority class regardless of which repository they are maintaining. [1]: velero-install.md#usage [2]: node-agent-concurrency.md diff --git a/site/content/docs/main/upgrade-to-1.16.md b/site/content/docs/main/upgrade-to-1.17.md similarity index 69% rename from site/content/docs/main/upgrade-to-1.16.md rename to site/content/docs/main/upgrade-to-1.17.md index 47fb25409..f6738d55c 100644 --- a/site/content/docs/main/upgrade-to-1.16.md +++ b/site/content/docs/main/upgrade-to-1.17.md @@ -1,13 +1,13 @@ --- -title: "Upgrading to Velero 1.16" +title: "Upgrading to Velero 1.17" layout: docs --- ## Prerequisites -- Velero [v1.15.x][8] installed. +- Velero [v1.16.x][9] installed. -If you're not yet running at least Velero v1.15, see the following: +If you're not yet running at least Velero v1.16, see the following: - [Upgrading to v1.8][1] - [Upgrading to v1.9][2] @@ -17,13 +17,14 @@ If you're not yet running at least Velero v1.15, see the following: - [Upgrading to v1.13][6] - [Upgrading to v1.14][7] - [Upgrading to v1.15][8] +- [Upgrading to v1.16][9] Before upgrading, check the [Velero compatibility matrix](https://github.com/vmware-tanzu/velero#velero-compatibility-matrix) to make sure your version of Kubernetes is supported by the new version of Velero. ## Instructions -### Upgrade from v1.15 -1. Install the Velero v1.16 command-line interface (CLI) by following the [instructions here][0]. +### Upgrade from v1.16 +1. Install the Velero v1.17 command-line interface (CLI) by following the [instructions here][0]. Verify that you've properly installed it by running: @@ -35,7 +36,7 @@ Before upgrading, check the [Velero compatibility matrix](https://github.com/vmw ```bash Client: - Version: v1.16.0 + Version: v1.17.0 Git commit: ``` @@ -45,21 +46,28 @@ Before upgrading, check the [Velero compatibility matrix](https://github.com/vmw velero install --crds-only --dry-run -o yaml | kubectl apply -f - ``` -3. Update the container image used by the Velero deployment, plugin and (optionally) the node agent daemon set: +3. (optional) Update the `uploader-type` to `kopia` if you are using `restic`: + ```bash + kubectl get deploy -n velero -ojson \ + | sed "s/\"--uploader-type=restic\"/\"--uploader-type=kopia\"/g" \ + | kubectl apply -f - + ``` + +4. Update the container image used by the Velero deployment, plugin and (optionally) the node agent daemon set: ```bash # set the container and image of the init container for plugin accordingly, # if you are using other plugin kubectl set image deployment/velero \ - velero=velero/velero:v1.16.0 \ - velero-plugin-for-aws=velero/velero-plugin-for-aws:v1.12.0 \ + velero=velero/velero:v1.17.0 \ + velero-plugin-for-aws=velero/velero-plugin-for-aws:v1.13.0 \ --namespace velero # optional, if using the node agent daemonset kubectl set image daemonset/node-agent \ - node-agent=velero/velero:v1.16.0 \ + node-agent=velero/velero:v1.17.0 \ --namespace velero ``` -4. Confirm that the deployment is up and running with the correct version by running: +5. Confirm that the deployment is up and running with the correct version by running: ```bash velero version @@ -69,11 +77,11 @@ Before upgrading, check the [Velero compatibility matrix](https://github.com/vmw ```bash Client: - Version: v1.16.0 + Version: v1.17.0 Git commit: Server: - Version: v1.16.0 + Version: v1.17.0 ``` [0]: basic-install.md#install-the-cli @@ -85,3 +93,4 @@ Before upgrading, check the [Velero compatibility matrix](https://github.com/vmw [6]: https://velero.io/docs/v1.13/upgrade-to-1.13 [7]: https://velero.io/docs/v1.14/upgrade-to-1.14 [8]: https://velero.io/docs/v1.15/upgrade-to-1.15 +[9]: https://velero.io/docs/v1.16/upgrade-to-1.16 \ No newline at end of file diff --git a/site/content/docs/main/velero-install.md b/site/content/docs/main/velero-install.md index 51c52db1c..a4925278c 100644 --- a/site/content/docs/main/velero-install.md +++ b/site/content/docs/main/velero-install.md @@ -31,12 +31,16 @@ velero install \ [--maintenance-job-cpu-request ] \ [--maintenance-job-mem-request ] \ [--maintenance-job-cpu-limit ] \ - [--maintenance-job-mem-limit ] + [--maintenance-job-mem-limit ] \ + [--server-priority-class-name ] \ + [--node-agent-priority-class-name ] ``` The values for the resource requests and limits flags follow the same format as [Kubernetes resource requirements][3] For plugin container images, please refer to our [supported providers][2] page. +The `--server-priority-class-name` and `--node-agent-priority-class-name` flags allow you to set priority classes for the Velero server deployment and node agent daemonset respectively. This can help ensure proper scheduling and eviction behavior in resource-constrained environments. Note that you must create the priority class before installing Velero. + ## Examples This section provides examples that serve as a starting point for more customized installations. diff --git a/site/data/docs/main-toc.yml b/site/data/docs/main-toc.yml index 051c0575d..8b93bc400 100644 --- a/site/data/docs/main-toc.yml +++ b/site/data/docs/main-toc.yml @@ -13,8 +13,8 @@ toc: url: /basic-install - page: Customize Installation url: /customize-installation - - page: Upgrade to 1.15 - url: /upgrade-to-1.15 + - page: Upgrade to 1.17 + url: /upgrade-to-1.17 - page: Supported providers url: /supported-providers - page: Evaluation install diff --git a/test/e2e/backup/backup.go b/test/e2e/backup/backup.go index b0e7c06da..48ebd0500 100644 --- a/test/e2e/backup/backup.go +++ b/test/e2e/backup/backup.go @@ -26,6 +26,7 @@ import ( . "github.com/onsi/gomega" . "github.com/vmware-tanzu/velero/test" + "github.com/vmware-tanzu/velero/test/util/common" . "github.com/vmware-tanzu/velero/test/util/k8s" . "github.com/vmware-tanzu/velero/test/util/kibishii" . "github.com/vmware-tanzu/velero/test/util/velero" @@ -194,11 +195,10 @@ func BackupRestoreTest(backupRestoreTestConfig BackupRestoreTestConfig) { Expect(CreateSecretFromFiles(context.TODO(), *veleroCfg.ClientToInstallVelero, veleroCfg.VeleroNamespace, secretName, files)).To(Succeed()) // Create additional BSL using credential - additionalBsl := "add-bsl" Expect(VeleroCreateBackupLocation(context.TODO(), veleroCfg.VeleroCLI, veleroCfg.VeleroNamespace, - additionalBsl, + common.AdditionalBSLName, veleroCfg.AdditionalBSLProvider, veleroCfg.AdditionalBSLBucket, veleroCfg.AdditionalBSLPrefix, @@ -207,32 +207,25 @@ func BackupRestoreTest(backupRestoreTestConfig BackupRestoreTestConfig) { secretKey, )).To(Succeed()) - BSLs := []string{"default", additionalBsl} - - for _, bsl := range BSLs { - backupName = fmt.Sprintf("backup-%s", bsl) - restoreName = fmt.Sprintf("restore-%s", bsl) - // We limit the length of backup name here to avoid the issue of vsphere plugin https://github.com/vmware-tanzu/velero-plugin-for-vsphere/issues/370 - // We can remove the logic once the issue is fixed - if bsl == "default" { - backupName = fmt.Sprintf("%s-%s", backupName, UUIDgen) - restoreName = fmt.Sprintf("%s-%s", restoreName, UUIDgen) - } - veleroCfg.ProvideSnapshotsVolumeParam = !provideSnapshotVolumesParmInBackup - workloadNS := kibishiiNamespace + bsl - Expect( - RunKibishiiTests( - veleroCfg, - backupName, - restoreName, - bsl, - workloadNS, - useVolumeSnapshots, - !useVolumeSnapshots, - ), - ).To(Succeed(), - "Failed to successfully backup and restore Kibishii namespace using BSL %s", bsl) - } + // We limit the length of backup name here to avoid the issue of vsphere plugin + // https://github.com/vmware-tanzu/velero-plugin-for-vsphere/issues/370 + // We can remove the logic once the issue is fixed + backupName = "backup-" + common.AdditionalBSLName + restoreName = "restore-" + common.AdditionalBSLName + veleroCfg.ProvideSnapshotsVolumeParam = !provideSnapshotVolumesParmInBackup + workloadNS := kibishiiNamespace + common.AdditionalBSLName + Expect( + RunKibishiiTests( + veleroCfg, + backupName, + restoreName, + common.AdditionalBSLName, + workloadNS, + useVolumeSnapshots, + !useVolumeSnapshots, + ), + ).To(Succeed(), + "Failed to successfully backup and restore Kibishii namespace with additional BSL %s", common.AdditionalBSLName) }) }) } diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index ccbef8668..0159af8af 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -398,25 +398,25 @@ var _ = Describe( // Test backup and restore of Kibishii using restic var _ = Describe( "Velero tests on cluster using the plugin provider for object storage and Restic for volume backups", - Label("Basic", "Restic"), + Label("Basic", "Restic", "AdditionalBSL"), BackupRestoreWithRestic, ) var _ = Describe( "Velero tests on cluster using the plugin provider for object storage and snapshots for volume backups", - Label("Basic", "Snapshot", "SkipVanillaZfs"), + Label("Basic", "Snapshot", "SkipVanillaZfs", "AdditionalBSL"), BackupRestoreWithSnapshots, ) var _ = Describe( "Velero tests on cluster using the plugin provider for object storage and snapshots for volume backups", - Label("Basic", "Snapshot", "RetainPV"), + Label("Basic", "Snapshot", "RetainPV", "AdditionalBSL"), BackupRestoreRetainedPVWithSnapshots, ) var _ = Describe( "Velero tests on cluster using the plugin provider for object storage and snapshots for volume backups", - Label("Basic", "Restic", "RetainPV"), + Label("Basic", "Restic", "RetainPV", "AdditionalBSL"), BackupRestoreRetainedPVWithRestic, ) @@ -597,12 +597,12 @@ var _ = Describe( var _ = Describe( "Local backups will be deleted once the corresponding backup storage location is deleted", - Label("BSL", "Deletion", "Snapshot", "SkipVanillaZfs"), + Label("BSL", "Deletion", "Snapshot", "SkipVanillaZfs", "AdditionalBSL"), BslDeletionWithSnapshots, ) var _ = Describe( "Local backups and Restic repos will be deleted once the corresponding backup storage location is deleted", - Label("BSL", "Deletion", "Restic"), + Label("BSL", "Deletion", "Restic", "AdditionalBSL"), BslDeletionWithRestic, ) diff --git a/test/util/common/common.go b/test/util/common/common.go index 4b0041e44..853c0bfc9 100644 --- a/test/util/common/common.go +++ b/test/util/common/common.go @@ -9,6 +9,7 @@ import ( "io" "os" "os/exec" + "strings" ) const ( @@ -16,17 +17,22 @@ const ( WorkerOSWindows string = "windows" ) +const ( + DefaultBSLName string = "default" + AdditionalBSLName string = "add-bsl" +) + type OsCommandLine struct { Cmd string Args []string } -func GetListByCmdPipes(ctx context.Context, cmdlines []*OsCommandLine) ([]string, error) { +func GetListByCmdPipes(ctx context.Context, cmdLines []*OsCommandLine) ([]string, error) { var buf bytes.Buffer var err error var cmds []*exec.Cmd - for _, cmdline := range cmdlines { + for _, cmdline := range cmdLines { cmd := exec.Command(cmdline.Cmd, cmdline.Args...) cmds = append(cmds, cmd) } @@ -64,6 +70,29 @@ func GetListByCmdPipes(ctx context.Context, cmdlines []*OsCommandLine) ([]string return ret, nil } +func GetResourceWithLabel(ctx context.Context, namespace, resourceName string, labels map[string]string) ([]string, error) { + labelStr := "" + + for key, value := range labels { + strings.Join([]string{labelStr, key + "=" + value}, ",") + } + + cmds := []*OsCommandLine{} + cmd := &OsCommandLine{ + Cmd: "kubectl", + Args: []string{"get", resourceName, "--no-headers", "-n", namespace, "-l", labelStr}, + } + cmds = append(cmds, cmd) + + cmd = &OsCommandLine{ + Cmd: "awk", + Args: []string{"{print $1}"}, + } + cmds = append(cmds, cmd) + + return GetListByCmdPipes(ctx, cmds) +} + func CMDExecWithOutput(checkCMD *exec.Cmd) (*[]byte, error) { stdoutPipe, err := checkCMD.StdoutPipe() if err != nil { diff --git a/test/util/kibishii/kibishii_utils.go b/test/util/kibishii/kibishii_utils.go index 9ed6080b5..f55b4226c 100644 --- a/test/util/kibishii/kibishii_utils.go +++ b/test/util/kibishii/kibishii_utils.go @@ -83,7 +83,7 @@ func RunKibishiiTests( ) error { pvCount := len(KibishiiPVCNameList) client := *veleroCfg.ClientToInstallVelero - oneHourTimeout, ctxCancel := context.WithTimeout(context.Background(), time.Minute*60) + timeOutContext, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5) defer ctxCancel() veleroCLI := veleroCfg.VeleroCLI providerName := veleroCfg.CloudProvider @@ -97,7 +97,7 @@ func RunKibishiiTests( fmt.Println(errors.Wrapf(err, "failed to delete the namespace %q", kibishiiNamespace)) } } - if err := CreateNamespace(oneHourTimeout, client, kibishiiNamespace); err != nil { + if err := CreateNamespace(timeOutContext, client, kibishiiNamespace); err != nil { return errors.Wrapf(err, "Failed to create namespace %s to install Kibishii workload", kibishiiNamespace) } defer func() { @@ -109,7 +109,7 @@ func RunKibishiiTests( }() fmt.Printf("KibishiiPrepareBeforeBackup %s\n", time.Now().Format("2006-01-02 15:04:05")) if err := KibishiiPrepareBeforeBackup( - oneHourTimeout, + timeOutContext, client, providerName, kibishiiNamespace, @@ -134,7 +134,7 @@ func RunKibishiiTests( BackupCfg.ProvideSnapshotsVolumeParam = veleroCfg.ProvideSnapshotsVolumeParam fmt.Printf("VeleroBackupNamespace %s\n", time.Now().Format("2006-01-02 15:04:05")) - if err := VeleroBackupNamespace(oneHourTimeout, veleroCLI, veleroNamespace, BackupCfg); err != nil { + if err := VeleroBackupNamespace(timeOutContext, veleroCLI, veleroNamespace, BackupCfg); err != nil { RunDebug(context.Background(), veleroCLI, veleroNamespace, backupName, "") return errors.Wrapf(err, "Failed to backup kibishii namespace %s", kibishiiNamespace) } @@ -142,12 +142,25 @@ func RunKibishiiTests( fmt.Printf("VeleroBackupNamespace done %s\n", time.Now().Format("2006-01-02 15:04:05")) fmt.Printf("KibishiiVerifyAfterBackup %s\n", time.Now().Format("2006-01-02 15:04:05")) + + objectStoreProvider := veleroCfg.ObjectStoreProvider + cloudCredentialsFile := veleroCfg.CloudCredentialsFile + bslBucket := veleroCfg.BSLBucket + bslPrefix := veleroCfg.BSLPrefix + bslConfig := veleroCfg.BSLConfig + if backupLocation == common.AdditionalBSLName { + objectStoreProvider = veleroCfg.AdditionalBSLProvider + cloudCredentialsFile = veleroCfg.AdditionalBSLCredentials + bslBucket = veleroCfg.AdditionalBSLBucket + bslPrefix = veleroCfg.AdditionalBSLPrefix + bslConfig = veleroCfg.AdditionalBSLConfig + } backupVolumeInfo, err := GetVolumeInfo( - veleroCfg.ObjectStoreProvider, - veleroCfg.CloudCredentialsFile, - veleroCfg.BSLBucket, - veleroCfg.BSLPrefix, - veleroCfg.BSLConfig, + objectStoreProvider, + cloudCredentialsFile, + bslBucket, + bslPrefix, + bslConfig, backupName, BackupObjectsPrefix+"/"+backupName, ) @@ -161,7 +174,7 @@ func RunKibishiiTests( if veleroCfg.HasVspherePlugin { // Wait for uploads started by the Velero Plugin for vSphere to complete fmt.Println("Waiting for vSphere uploads to complete") - if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, kibishiiNamespace, 2); err != nil { + if err := WaitForVSphereUploadCompletion(timeOutContext, time.Hour, kibishiiNamespace, 2); err != nil { return errors.Wrapf(err, "Error waiting for uploads to complete") } } @@ -180,44 +193,12 @@ func RunKibishiiTests( return errors.Wrap(err, "exceed waiting for snapshot created in cloud") } } else { - pvbs, err := GetPVB(oneHourTimeout, veleroCfg.VeleroNamespace, kibishiiNamespace) + pvbNum, err := BackupPVBNum(timeOutContext, veleroCfg.VeleroNamespace, backupName) if err != nil { return errors.Wrapf(err, "failed to get PVB for namespace %s", kibishiiNamespace) } - if len(pvbs) != pvCount { - return errors.New(fmt.Sprintf("PVB count %d should be %d in namespace %s", len(pvbs), pvCount, kibishiiNamespace)) - } - if providerName == Vsphere { - // Wait for uploads started by the Velero Plugin for vSphere to complete - // TODO - remove after upload progress monitoring is implemented - - // TODO[High] - uncomment code block below when vSphere plugin PR #500 is included in release version. - // https://github.com/vmware-tanzu/velero-plugin-for-vsphere/pull/500/ - - // fmt.Println("Make sure no vSphere snapshot uploads created") - // if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, kibishiiNamespace, 0); err != nil { - // return errors.Wrapf(err, "Error get vSphere snapshot uploads") - // } - } else { - // wait for a period to confirm no snapshots content exist for the backup - time.Sleep(1 * time.Minute) - if strings.EqualFold(veleroFeatures, FeatureCSI) { - _, err = BuildSnapshotCheckPointFromVolumeInfo(veleroCfg, backupVolumeInfo, 0, - kibishiiNamespace, backupName, KibishiiPVCNameList) - if err != nil { - return errors.Wrap(err, "failed to get snapshot checkPoint") - } - } else { - err = CheckSnapshotsInProvider( - veleroCfg, - backupName, - SnapshotCheckPoint{}, - false, - ) - if err != nil { - return errors.Wrap(err, "exceed waiting for snapshot created in cloud") - } - } + if pvbNum != pvCount { + return errors.New(fmt.Sprintf("PVB count %d should be %d in namespace %s", pvbNum, pvCount, kibishiiNamespace)) } } @@ -227,11 +208,11 @@ func RunKibishiiTests( fmt.Printf("Re-populate volume %s\n", time.Now().Format("2006-01-02 15:04:05")) for _, pod := range KibishiiPodNameList { // To ensure Kibishii verification result is accurate - ClearKibishiiData(oneHourTimeout, kibishiiNamespace, pod, "kibishii", "data") + ClearKibishiiData(timeOutContext, kibishiiNamespace, pod, "kibishii", "data") CreateFileContent := fileBaseContent + pod err := CreateFileToPod( - oneHourTimeout, + timeOutContext, kibishiiNamespace, pod, "kibishii", @@ -244,13 +225,13 @@ func RunKibishiiTests( return errors.Wrapf(err, "failed to create file %s", fileName) } } - fmt.Printf("Re-poulate volume done %s\n", time.Now().Format("2006-01-02 15:04:05")) + fmt.Printf("Re-populate volume done %s\n", time.Now().Format("2006-01-02 15:04:05")) pvList := []string{} if strings.Contains(veleroCfg.KibishiiDirectory, "sc-reclaim-policy") { // Get leftover PV list for PV cleanup for _, pvc := range KibishiiPVCNameList { - pv, err := GetPvName(oneHourTimeout, client, pvc, kibishiiNamespace) + pv, err := GetPvName(timeOutContext, client, pvc, kibishiiNamespace) if err != nil { errors.Wrapf(err, "failed to delete namespace %s", kibishiiNamespace) } @@ -259,7 +240,7 @@ func RunKibishiiTests( } fmt.Printf("Simulating a disaster by removing namespace %s %s\n", kibishiiNamespace, time.Now().Format("2006-01-02 15:04:05")) - if err := DeleteNamespace(oneHourTimeout, client, kibishiiNamespace, true); err != nil { + if err := DeleteNamespace(timeOutContext, client, kibishiiNamespace, true); err != nil { return errors.Wrapf(err, "failed to delete namespace %s", kibishiiNamespace) } @@ -267,35 +248,35 @@ func RunKibishiiTests( // In scenario of CSI PV-retain-policy test, to restore PV of the backed up resource, we should make sure // there are no PVs of the same name left, because in previous test step, PV's reclaim policy is retain, // so PVs are not deleted although workload namespace is destroyed. - if err := DeletePVs(oneHourTimeout, *veleroCfg.ClientToInstallVelero, pvList); err != nil { + if err := DeletePVs(timeOutContext, *veleroCfg.ClientToInstallVelero, pvList); err != nil { return errors.Wrapf(err, "failed to delete PVs %v", pvList) } } if useVolumeSnapshots { - // the snapshots of AWS may be still in pending status when do the restore, wait for a while - // to avoid this https://github.com/vmware-tanzu/velero/issues/1799 - // TODO remove this after https://github.com/vmware-tanzu/velero/issues/3533 is fixed - fmt.Println("Waiting 5 minutes to make sure the snapshots are ready...") - time.Sleep(5 * time.Minute) + // NativeSnapshot is not the focus of Velero roadmap now, + // and AWS EBS is not covered by E2E test. + // Don't need to wait up to 5 minutes. + fmt.Println("Waiting 30 seconds to make sure the snapshots are ready...") + time.Sleep(30 * time.Second) } fmt.Printf("VeleroRestore %s\n", time.Now().Format("2006-01-02 15:04:05")) - if err := VeleroRestore(oneHourTimeout, veleroCLI, veleroNamespace, restoreName, backupName, ""); err != nil { + if err := VeleroRestore(timeOutContext, veleroCLI, veleroNamespace, restoreName, backupName, ""); err != nil { RunDebug(context.Background(), veleroCLI, veleroNamespace, "", restoreName) return errors.Wrapf(err, "Restore %s failed from backup %s", restoreName, backupName) } if !useVolumeSnapshots && providerName != Vsphere { - pvrs, err := GetPVR(oneHourTimeout, veleroCfg.VeleroNamespace, kibishiiNamespace) + pvrNum, err := RestorePVRNum(timeOutContext, veleroCfg.VeleroNamespace, restoreName) if err != nil { return errors.Wrapf(err, "failed to get PVR for namespace %s", kibishiiNamespace) - } else if len(pvrs) != pvCount { - return errors.New(fmt.Sprintf("PVR count %d is not as expected %d", len(pvrs), pvCount)) + } else if pvrNum != pvCount { + return errors.New(fmt.Sprintf("PVR count %d is not as expected %d", pvrNum, pvCount)) } } fmt.Printf("KibishiiVerifyAfterRestore %s\n", time.Now().Format("2006-01-02 15:04:05")) - if err := KibishiiVerifyAfterRestore(client, kibishiiNamespace, oneHourTimeout, DefaultKibishiiData, fileName, veleroCfg.WorkerOS); err != nil { + if err := KibishiiVerifyAfterRestore(client, kibishiiNamespace, timeOutContext, DefaultKibishiiData, fileName, veleroCfg.WorkerOS); err != nil { return errors.Wrapf(err, "Error verifying kibishii after restore") } @@ -329,19 +310,13 @@ func installKibishii( return errors.New("vSphere needs to download the Kibishii repository first because it needs to inject some image patch file to work.") } - // TODO: blackpiglet debug - fmt.Printf("targetKustomizeDir %s, workerOS: %s, WorkerOSWindows: %s.\n", targetKustomizeDir, workerOS, common.WorkerOSWindows) - if workerOS == common.WorkerOSWindows { targetKustomizeDir += "-windows" - - // TODO: blackpiglet debug - fmt.Printf("targetKustomizeDir for windows %s\n", targetKustomizeDir) } fmt.Printf("The installed Kibishii Kustomize package directory is %s.\n", targetKustomizeDir) } - // update kibishi images with image registry proxy if it is set + // update kibishii images with image registry proxy if it is set baseDir := resolveBasePath(kibishiiDirectory) fmt.Printf("Using image registry proxy %s to patch Kibishii images. Base Dir: %s\n", imageRegistryProxy, baseDir) diff --git a/test/util/velero/install.go b/test/util/velero/install.go index 887bef479..2f4eb43de 100644 --- a/test/util/velero/install.go +++ b/test/util/velero/install.go @@ -187,10 +187,15 @@ func VeleroInstall(ctx context.Context, veleroCfg *test.VeleroConfig, isStandbyC WorkerOS: veleroCfg.WorkerOS, }, ); err != nil { - time.Sleep(1 * time.Minute) - RunDebug(context.Background(), veleroCfg.VeleroCLI, veleroCfg.VeleroNamespace, "", "") + RunDebug(ctx, veleroCfg.VeleroCLI, veleroCfg.VeleroNamespace, "", "") return errors.WithMessagef(err, "Failed to install Velero in the cluster") } + + if err := CheckBSL(ctx, veleroCfg.VeleroNamespace, common.DefaultBSLName); err != nil { + RunDebug(ctx, veleroCfg.VeleroCLI, veleroCfg.VeleroNamespace, "", "") + return fmt.Errorf("fail to wait BSL default till ready: %w", err) + } + fmt.Printf("Finish velero install %s\n", time.Now().Format("2006-01-02 15:04:05")) return nil } @@ -418,6 +423,14 @@ func installVeleroServer( args = append(args, "--sa-annotations", options.ServiceAccountAnnotations.String()) } + if options.ServerPriorityClassName != "" { + args = append(args, "--server-priority-class-name", options.ServerPriorityClassName) + } + + if options.NodeAgentPriorityClassName != "" { + args = append(args, "--node-agent-priority-class-name", options.NodeAgentPriorityClassName) + } + // Only version no older than v1.15 support --backup-repository-configmap. if options.BackupRepoConfigMap != "" && (semver.Compare(version, "v1.15") >= 0 || version == "main") { @@ -425,6 +438,14 @@ func installVeleroServer( args = append(args, fmt.Sprintf("--backup-repository-configmap=%s", options.BackupRepoConfigMap)) } + if options.RepoMaintenanceJobConfigMap != "" { + args = append(args, fmt.Sprintf("--repo-maintenance-job-configmap=%s", options.RepoMaintenanceJobConfigMap)) + } + + if options.NodeAgentConfigMap != "" { + args = append(args, fmt.Sprintf("--node-agent-configmap=%s", options.NodeAgentConfigMap)) + } + if err := createVeleroResources(ctx, cli, namespace, args, options); err != nil { return err } @@ -738,32 +759,35 @@ func IsVeleroReady(ctx context.Context, veleroCfg *test.VeleroConfig) (bool, err } } - // Check BSL with poll - err = wait.PollUntilContextTimeout(ctx, k8s.PollInterval, time.Minute, true, func(ctx context.Context) (bool, error) { - return checkBSL(ctx, veleroCfg) == nil, nil - }) - if err != nil { - return false, errors.Wrap(err, "failed to check the bsl") + if err := CheckBSL(ctx, namespace, common.DefaultBSLName); err != nil { + return false, err } + return true, nil } -func checkBSL(ctx context.Context, veleroCfg *test.VeleroConfig) error { - namespace := veleroCfg.VeleroNamespace - stdout, stderr, err := velerexec.RunCommand(exec.CommandContext(ctx, "kubectl", "get", "bsl", "default", - "-o", "json", "-n", namespace)) - if err != nil { - return errors.Wrapf(err, "failed to get bsl %s stdout=%s, stderr=%s", veleroCfg.BSLBucket, stdout, stderr) - } else { - bsl := &velerov1api.BackupStorageLocation{} - if err = json.Unmarshal([]byte(stdout), bsl); err != nil { - return errors.Wrapf(err, "failed to unmarshal the velero bsl") +func CheckBSL(ctx context.Context, ns string, bslName string) error { + // Check BSL with poll + err := wait.PollUntilContextTimeout(ctx, k8s.PollInterval, time.Minute, true, func(ctx context.Context) (bool, error) { + stdout, stderr, err := velerexec.RunCommand(exec.CommandContext(ctx, "kubectl", "get", "bsl", bslName, + "-o", "json", "-n", ns)) + if err != nil { + return false, errors.Wrapf(err, "failed to get bsl %s stdout=%s, stderr=%s", bslName, stdout, stderr) + } else { + bsl := &velerov1api.BackupStorageLocation{} + if err = json.Unmarshal([]byte(stdout), bsl); err != nil { + return false, errors.Wrapf(err, "failed to unmarshal the velero bsl") + } + if bsl.Status.Phase != velerov1api.BackupStorageLocationPhaseAvailable { + // BSL is not ready. Continue polling till timeout. + return false, nil + } } - if bsl.Status.Phase != velerov1api.BackupStorageLocationPhaseAvailable { - return fmt.Errorf("current bsl %s is not available", veleroCfg.BSLBucket) - } - } - return nil + + return true, nil + }) + + return err } func PrepareVelero(ctx context.Context, caseName string, veleroCfg test.VeleroConfig) error { diff --git a/test/util/velero/velero_utils.go b/test/util/velero/velero_utils.go index db417a0a3..1d17e4ace 100644 --- a/test/util/velero/velero_utils.go +++ b/test/util/velero/velero_utils.go @@ -272,6 +272,8 @@ func getProviderVeleroInstallOptions(veleroCfg *VeleroConfig, io.VeleroPodMemRequest = veleroCfg.VeleroPodMemRequest io.DisableInformerCache = veleroCfg.DisableInformerCache io.ItemBlockWorkerCount = veleroCfg.ItemBlockWorkerCount + io.ServerPriorityClassName = veleroCfg.ServerPriorityClassName + io.NodeAgentPriorityClassName = veleroCfg.NodeAgentPriorityClassName return io, nil } @@ -651,7 +653,12 @@ func VeleroCreateBackupLocation(ctx context.Context, if secretName != "" && secretKey != "" { args = append(args, "--credential", fmt.Sprintf("%s=%s", secretName, secretKey)) } - return VeleroCmdExec(ctx, veleroCLI, args) + + if err := VeleroCmdExec(ctx, veleroCLI, args); err != nil { + return err + } + + return CheckBSL(ctx, veleroNamespace, name) } func VeleroVersion(ctx context.Context, veleroCLI, veleroNamespace string) error { @@ -1526,35 +1533,30 @@ func ListVeleroPods(ctx context.Context, veleroNamespace string) ([]string, erro return common.GetListByCmdPipes(ctx, cmds) } -func GetVeleroResource(ctx context.Context, veleroNamespace, namespace, resourceName string) ([]string, error) { - cmds := []*common.OsCommandLine{} - cmd := &common.OsCommandLine{ - Cmd: "kubectl", - Args: []string{"get", resourceName, "-n", veleroNamespace}, - } - cmds = append(cmds, cmd) +func BackupPVBNum(ctx context.Context, veleroNamespace, backupName string) (int, error) { + outputList, err := common.GetResourceWithLabel( + ctx, + veleroNamespace, + "PodVolumeBackup", + map[string]string{ + velerov1api.BackupNameLabel: backupName, + }, + ) - cmd = &common.OsCommandLine{ - Cmd: "grep", - Args: []string{namespace}, - } - cmds = append(cmds, cmd) - - cmd = &common.OsCommandLine{ - Cmd: "awk", - Args: []string{"{print $1}"}, - } - cmds = append(cmds, cmd) - - return common.GetListByCmdPipes(ctx, cmds) + return len(outputList), err } -func GetPVB(ctx context.Context, veleroNamespace, namespace string) ([]string, error) { - return GetVeleroResource(ctx, veleroNamespace, namespace, "podvolumebackup") -} +func RestorePVRNum(ctx context.Context, veleroNamespace, restoreName string) (int, error) { + outputList, err := common.GetResourceWithLabel( + ctx, + veleroNamespace, + "PodVolumeRestore", + map[string]string{ + velerov1api.RestoreNameLabel: restoreName, + }, + ) -func GetPVR(ctx context.Context, veleroNamespace, namespace string) ([]string, error) { - return GetVeleroResource(ctx, veleroNamespace, namespace, "podvolumerestore") + return len(outputList), err } func IsSupportUploaderType(version string) (bool, error) {