diff --git a/.github/workflows/e2e-test-kind.yaml b/.github/workflows/e2e-test-kind.yaml index 65517804c..47979a27c 100644 --- a/.github/workflows/e2e-test-kind.yaml +++ b/.github/workflows/e2e-test-kind.yaml @@ -8,16 +8,26 @@ on: - "design/**" - "**/*.md" jobs: + get-go-version: + uses: ./.github/workflows/get-go-version.yaml + with: + ref: ${{ github.event.pull_request.base.ref }} + # Build the Velero CLI and image once for all Kubernetes versions, and cache it so the fan-out workers can get it. build: runs-on: ubuntu-latest + needs: get-go-version + outputs: + minio-dockerfile-sha: ${{ steps.minio-version.outputs.dockerfile_sha }} steps: - name: Check out the code uses: actions/checkout@v5 - - name: Set up Go - uses: actions/setup-go@v5 + + - name: Set up Go version + uses: actions/setup-go@v6 with: - go-version-file: 'go.mod' + go-version: ${{ needs.get-go-version.outputs.version }} + # Look for a CLI that's made for this PR - name: Fetch built CLI id: cli-cache @@ -44,6 +54,26 @@ jobs: run: | IMAGE=velero VERSION=pr-test BUILD_OUTPUT_TYPE=docker make container docker save velero:pr-test-linux-amd64 -o ./velero.tar + # Check and build MinIO image once for all e2e tests + - name: Check Bitnami MinIO Dockerfile version + id: minio-version + run: | + DOCKERFILE_SHA=$(curl -s https://api.github.com/repos/bitnami/containers/commits?path=bitnami/minio/2025/debian-12/Dockerfile\&per_page=1 | jq -r '.[0].sha') + echo "dockerfile_sha=${DOCKERFILE_SHA}" >> $GITHUB_OUTPUT + - name: Cache MinIO Image + uses: actions/cache@v4 + id: minio-cache + with: + path: ./minio-image.tar + key: minio-bitnami-${{ steps.minio-version.outputs.dockerfile_sha }} + - name: Build MinIO Image from Bitnami Dockerfile + if: steps.minio-cache.outputs.cache-hit != 'true' + run: | + echo "Building MinIO image from Bitnami Dockerfile..." + git clone --depth 1 https://github.com/bitnami/containers.git /tmp/bitnami-containers + cd /tmp/bitnami-containers/bitnami/minio/2025/debian-12 + docker build -t bitnami/minio:local . + docker save bitnami/minio:local > ${{ github.workspace }}/minio-image.tar # Create json of k8s versions to test # from guide: https://stackoverflow.com/a/65094398/4590470 setup-test-matrix: @@ -75,6 +105,7 @@ jobs: needs: - build - setup-test-matrix + - get-go-version runs-on: ubuntu-latest strategy: matrix: ${{fromJson(needs.setup-test-matrix.outputs.matrix)}} @@ -82,13 +113,26 @@ jobs: steps: - name: Check out the code uses: actions/checkout@v5 - - name: Set up Go - uses: actions/setup-go@v5 + + - name: Set up Go version + uses: actions/setup-go@v6 with: - go-version-file: 'go.mod' + go-version: ${{ needs.get-go-version.outputs.version }} + + # Fetch the pre-built MinIO image from the build job + - name: Fetch built MinIO Image + uses: actions/cache@v4 + id: minio-cache + with: + path: ./minio-image.tar + key: minio-bitnami-${{ needs.build.outputs.minio-dockerfile-sha }} + - name: Load MinIO Image + run: | + echo "Loading MinIO image..." + docker load < ./minio-image.tar - name: Install MinIO - run: - docker run -d --rm -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=minio123" -e "MINIO_DEFAULT_BUCKETS=bucket,additional-bucket" bitnami/minio:2021.6.17-debian-10-r7 + run: | + docker run -d --rm -p 9000:9000 -e "MINIO_ROOT_USER=minio" -e "MINIO_ROOT_PASSWORD=minio123" -e "MINIO_DEFAULT_BUCKETS=bucket,additional-bucket" bitnami/minio:local - uses: engineerd/setup-kind@v0.6.2 with: skipClusterLogsExport: true diff --git a/.github/workflows/get-go-version.yaml b/.github/workflows/get-go-version.yaml new file mode 100644 index 000000000..3bc3f8e53 --- /dev/null +++ b/.github/workflows/get-go-version.yaml @@ -0,0 +1,33 @@ +on: + workflow_call: + inputs: + ref: + description: "The target branch's ref" + required: true + type: string + outputs: + version: + description: "The expected Go version" + value: ${{ jobs.extract.outputs.version }} + +jobs: + extract: + runs-on: ubuntu-latest + outputs: + version: ${{ steps.pick-version.outputs.version }} + steps: + - name: Check out the code + uses: actions/checkout@v5 + + - id: pick-version + run: | + if [ "${{ inputs.ref }}" == "main" ]; then + version=$(grep '^go ' go.mod | awk '{print $2}' | cut -d. -f1-2) + else + goDirectiveVersion=$(grep '^go ' go.mod | awk '{print $2}') + toolChainVersion=$(grep '^toolchain ' go.mod | awk '{print $2}') + version=$(printf "%s\n%s\n" "$goDirectiveVersion" "$toolChainVersion" | sort -V | tail -n1) + fi + + echo "version=$version" + echo "version=$version" >> $GITHUB_OUTPUT diff --git a/.github/workflows/pr-ci-check.yml b/.github/workflows/pr-ci-check.yml index 0a394560a..c97f216b4 100644 --- a/.github/workflows/pr-ci-check.yml +++ b/.github/workflows/pr-ci-check.yml @@ -1,18 +1,26 @@ name: Pull Request CI Check on: [pull_request] jobs: + get-go-version: + uses: ./.github/workflows/get-go-version.yaml + with: + ref: ${{ github.event.pull_request.base.ref }} + build: name: Run CI + needs: get-go-version runs-on: ubuntu-latest strategy: fail-fast: false steps: - name: Check out the code uses: actions/checkout@v5 - - name: Set up Go - uses: actions/setup-go@v5 + + - name: Set up Go version + uses: actions/setup-go@v6 with: - go-version-file: 'go.mod' + go-version: ${{ needs.get-go-version.outputs.version }} + - name: Make ci run: make ci - name: Upload test coverage diff --git a/.github/workflows/pr-linter-check.yml b/.github/workflows/pr-linter-check.yml index 997466ccf..81a2bdd64 100644 --- a/.github/workflows/pr-linter-check.yml +++ b/.github/workflows/pr-linter-check.yml @@ -7,16 +7,24 @@ on: - "design/**" - "**/*.md" jobs: + get-go-version: + uses: ./.github/workflows/get-go-version.yaml + with: + ref: ${{ github.event.pull_request.base.ref }} + build: name: Run Linter Check runs-on: ubuntu-latest + needs: get-go-version steps: - name: Check out the code uses: actions/checkout@v5 - - name: Set up Go - uses: actions/setup-go@v5 + + - name: Set up Go version + uses: actions/setup-go@v6 with: - go-version-file: 'go.mod' + go-version: ${{ needs.get-go-version.outputs.version }} + - name: Linter check uses: golangci/golangci-lint-action@v8 with: diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 8dee8799a..8e1bc1219 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -9,17 +9,24 @@ on: - '*' jobs: + get-go-version: + uses: ./.github/workflows/get-go-version.yaml + with: + ref: ${ github.ref } build: name: Build runs-on: ubuntu-latest + needs: get-go-version steps: - name: Check out the code uses: actions/checkout@v5 - - name: Set up Go - uses: actions/setup-go@v5 + + - name: Set up Go version + uses: actions/setup-go@v6 with: - go-version-file: 'go.mod' + go-version: ${{ needs.get-go-version.outputs.version }} + - name: Set up QEMU id: qemu uses: docker/setup-qemu-action@v3 diff --git a/.github/workflows/stale-issues.yml b/.github/workflows/stale-issues.yml index 5fe397ba8..8f94ea65b 100644 --- a/.github/workflows/stale-issues.yml +++ b/.github/workflows/stale-issues.yml @@ -7,7 +7,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v9.1.0 + - uses: actions/stale@v10.0.0 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-issue-message: "This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 14 days. If a Velero team member has requested log or more information, please provide the output of the shared commands." diff --git a/README.md b/README.md index 0d5318873..046e55c91 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ The following is a list of the supported Kubernetes versions for each Velero ver | Velero version | Expected Kubernetes version compatibility | Tested on Kubernetes version | |----------------|-------------------------------------------|-------------------------------------| -| 1.17 | 1.18-latest | 1.31.7, 1.32.3, and 1.33.1 | +| 1.17 | 1.18-latest | 1.31.7, 1.32.3, 1.33.1, and 1.34.0 | | 1.16 | 1.18-latest | 1.31.4, 1.32.3, and 1.33.0 | | 1.15 | 1.18-latest | 1.28.8, 1.29.8, 1.30.4 and 1.31.1 | | 1.14 | 1.18-latest | 1.27.9, 1.28.9, and 1.29.4 | diff --git a/changelogs/unreleased/9226-sseago b/changelogs/unreleased/9226-sseago new file mode 100644 index 000000000..fe67d4cbf --- /dev/null +++ b/changelogs/unreleased/9226-sseago @@ -0,0 +1 @@ +Get pod list once per namespace in pvc IBA diff --git a/changelogs/unreleased/9244-priyansh17 b/changelogs/unreleased/9244-priyansh17 new file mode 100644 index 000000000..931c43946 --- /dev/null +++ b/changelogs/unreleased/9244-priyansh17 @@ -0,0 +1 @@ +Update AzureAD Microsoft Authentication Library to v1.5.0 \ No newline at end of file diff --git a/changelogs/unreleased/9248-0xLeo258 b/changelogs/unreleased/9248-0xLeo258 new file mode 100644 index 000000000..8332dcade --- /dev/null +++ b/changelogs/unreleased/9248-0xLeo258 @@ -0,0 +1 @@ +Protect VolumeSnapshot field from race condition during multi-thread backup diff --git a/changelogs/unreleased/9256-shubham-pampattiwar b/changelogs/unreleased/9256-shubham-pampattiwar new file mode 100644 index 000000000..2400fb6b6 --- /dev/null +++ b/changelogs/unreleased/9256-shubham-pampattiwar @@ -0,0 +1 @@ +Fix repository maintenance jobs to inherit allowlisted tolerations from Velero deployment diff --git a/changelogs/unreleased/9264-shubham-pampattiwar b/changelogs/unreleased/9264-shubham-pampattiwar new file mode 100644 index 000000000..711ea4b57 --- /dev/null +++ b/changelogs/unreleased/9264-shubham-pampattiwar @@ -0,0 +1 @@ +Fix schedule controller to prevent backup queue accumulation during extended blocking scenarios by properly handling empty backup phases \ No newline at end of file diff --git a/changelogs/unreleased/9281-0xLeo258 b/changelogs/unreleased/9281-0xLeo258 new file mode 100644 index 000000000..eb5bf3f5d --- /dev/null +++ b/changelogs/unreleased/9281-0xLeo258 @@ -0,0 +1 @@ +Implement concurrency control for cache of native VolumeSnapshotter plugin. diff --git a/design/concurrent-backup-processing.md b/design/concurrent-backup-processing.md new file mode 100644 index 000000000..76c22c3c0 --- /dev/null +++ b/design/concurrent-backup-processing.md @@ -0,0 +1,257 @@ +# Concurrent Backup Processing + +This enhancement will enable Velero to process multiple backups at the same time. This is largely a usability enhancement rather than a performance enhancement, since the overall backup throughput may not be significantly improved over the current implementation, since we are already processing individual backup items in parallel. It is a significant usability improvement, though, as with the current design, a user who submits a small backup may have to wait significantly longer than expected if the backup is submitted immediately after a large backup. + +## Background + +With the current implementation, only one backup may be `InProgress` at a time. A second backup created will not start processing until the first backup moves on to `WaitingForPluginOperations` or `Finalizing`. This is a usability concern, especially in clusters when multiple users are initiating backups. With this enhancement, we intend to allow multiple backups to be processed concurrently. This will allow backups to start processing immediately, even if a large backup was just submitted by another user. This enhancement will build on top of the prior parallel item processing feature by creating a dedicatede ItemBlock worker pool for each running backup. The pool will be created at the beginning of the backup reconcile, and the input channel will be passed to the Kubernetes backupper just like it is in the current release. + +The primary challenge is to make sure that the same workload in multiple backups is not backed up concurrently. If that were to happen, we would risk data corruption, especially around the processing of pod hooks and volume backup. For this first release we will take a conservative, high-level approach to overlap detection. Two backups will not run concurrently if there is any overlap in included namespaces. For example, if a backup that includes `ns1` and `ns2` is running, then a second backup for `ns2` and `ns3` will not be started. If a backup which does not filter namespaces is running (either a whole cluster backup or a non-namespace-limited backup with a label selector) then no other backups will be started, since a backup across all namespaces overlaps with any other backup. Calculating item-level overlap for queued backups is problematic since we don't know which items are included in a backup until backup processing has begun. A future release may add ItemBlock overlap detection, where at the item block worker level, the same item will not be processed by two different workers at the same time. This works together with workload conflict detection to further detect conflicts in a more granular level for shared resources between backups. Eventually, with a more complete understanding of individual workloads (either via ItemBlocks or some higher level model), the namespace level overlap detection may be relaxed in future versions. + +## Goals +- Process multiple backups concurrently +- Detect namespace overlap to avoid conflicts +- For queued backups (not yet runnable due to concurrency limits or overlap), indicate the queue position in status + +## Non Goals +- Handling NFS PVs when more than one PV point to the same underlying NFS share +- Handling VGDP cancellation for failed backups on restart +- Mounting a PVC for scenarios in which /tmp is too small for the number of concurrent backups +- Providing a mechanism to identify high priority backups which get preferential treatment in terms of ItemBlock worker availability +- Item-level overlap detection (future feature) +- Providing the ability to disable namespace-level overlap detection once Item-level overlap detection is in place (although this may be supported in a future version). + +## High-Level Design + +### Backup CRD changes + +Two new backup phases will be added: `Queued` and `ReadyToStart`. In the Backup workflow, new backups will be moved to the Queued phase when they are added to the backup queue. When a backup is removed from the queue because it is now able to run, it will be moved to the `ReadyToStart` phase, which will allow the backup controller to start processing it. + +In addition, a new Status field, `QueuePosition`, will be added to track the backup's current position in the queue. + +### New Controller: `backupQueueReconciler` + +A new reconciler will be added, `backupQueueReconciler` which will use the current `backupReconciler` logic for reconciling `New` backups but instead of running the backup, it will move the Backup to the `Queued` phase and set `QueuePosition`. + +In addition, this reconciler will periodically reconcile all queued backups (on some configurable time interval) and if there is a runnable backup, remove it from the queue, update `QueuePosition` for any queued backups behind it, and update its phase to `ReadyToStart`. + +Queued backups will be reconciled in order based on `QueuePosition`, so the first runnable backup found will be processed. A backup is runnable if both of the following conditions are true: +1) The total number of backups either `InProgress` or `ReadyToStart` is less than the configured number of concurrent backups. +2) The backup has no overlap with any backups currently `InProgress` or `ReadyToStart` or with any `Queued` backups with a higher (i.e. closer to 1) queue position than this backup. + +### Updates to Backup controller + +The current `backupReconciler` will change its reconciling rules. Instead of watching and reconciling New backups, it will reconcile `ReadyToStart` backups. In addition, it will be configured to run in parallel by setting `MaxConcurrentReconciles` based on the `concurrent-backups` server arg. + +The startup (and shutdown) of the ItemBlock worker pool will be moved from reconciler startup to the backup reconcile, which will give each running backup its own dedicated worker pool. The per-backup worker pool will will use the existing `--item-block-worker-count` installer/server arg. This means that the maximum number of ItemBlock workers for the entire Velero pod will be the ItemBlock worker count multiplied by concurrentBackups. For example, if concurrentBackups is 5, and itemBlockWorkerCount is 6, then there will be, at most, 30 worker threads active, 5 dedicated to each InProgress backup, but this maximum will only be achieved when the maximum number of backups are InProgress. This also means that each InProgress backup will have a dedicated ItemBlock input channel with the same fixed buffer size. + +## Detailed Design + +### New Install/Server configuration args + +A new install/server arg, `concurrent-backups` will be added. This will be an int-valued field specifying the number of backups which may be processed concurrently (with phase `InProgress`). If not specified, the default value of 1 will be used. + +### Consideration of backup overlap and concurrent backup processing + +The primary consideration for running additional backups concurrently is the configured `concurrent-backups` parameter. If the total number of `InProgress` and `ReadyToStart` backups is equal to `concurrent-backups` then any `Queued` backups will remain in the queue. + +The second consideration is backup overlap. In order to prevent interaction between running backups (particularly around volume backup and pod hooks), we cannot allow two overlapping backups to run at the same time. For now, we will define overlap broadly -- requiring that two concurrent backups don't include any of the same namespaces. A backup for `ns1` can run concurrently with a backup for `ns2`, but a backup for `[ns1,ns2]` cannot run concurrently with a backup for `ns1`. One consequence of this approach is that a backup which includes all namespaces (even if further filtered by resource or label) cannot run concurrently with *any other backup*. + +When determining which queued backup to run next, velero will look for the next queued backup which has no overlap with any InProgress backup or any Queued backup ahead of it. The reason we need to consider queued as well as running backups for overlap detection is as follows. + +Consider the following scenario. These are the current not-completed backups (ordered from oldest to newest) +1. backup1, includedNamespaces: [ns1, ns2], phase: InProgress +2. backup2, includedNamespaces: [ns2, ns3, ns5], phase: Queued, QueuePosition: 1 +3. backup3, includedNamespaces: [ns4, ns3], phase: Queued, QueuePosition: 2 +4. backup4, includedNamespaces: [ns5, ns6], phase: Queued, QueuePosition: 2 +5. backup5, includedNamespaces: [ns8, ns9], phase: Queued, QueuePosition: 3 + +Assuming `concurrent-backups` is 2, on the next reconcile, Velero will be able to start a second backup if there is one with no overlap. `backup2` cannot run, since `ns2` overlaps between it and the running `backup1`. If we only considered running overlap (and not queued overlap), then `backup3` could run now. It conflicts with the queued `backup2` on `ns3` but it does not conflict with the running backup. However, if it runs now, then when `backup1` completes, then `backup2` still can't run (since it now overlaps with running `backup3`on `ns3`), so `backup4` starts instead. Now when `backup3` completes, `backup2` still can't run (since it now conflicts with `backup4` on `ns5`). This means that even though it was the second backup created, it's the fourth to run -- providing worse time to completion than without parallel backups. If a queued backup has a large number of namespaces (a full-cluster backup for example), it would never run as long as new single-namespace backups keep being added to the queue. + +To resolve this problem we consider both running backups as well as backups ahead in the queue when resolving overlap conflicts. In the above scenario, `backup2` can't run yet since it overlaps with the running backup on `ns2`. In addition, `backup3` and `backup4` also can't run yet since they overlap with queued `backup2`. Therefore, `backup5` will run now. Once `backup1` completes, `backup2` will be free to run. + +### Backup CRD changes + +New Backup phases: +```go +const ( + // BackupPhaseQueued means the backup has been added to the + // queue by the BackupQueueReconciler. + BackupPhaseQueued BackupPhase = "Queued" + + // BackupPhaseReadyToStart means the backup has been removed from the + // queue by the BackupQueueReconciler and is ready to start. + BackupPhaseReadyToStart BackupPhase = "ReadyToStart" +) +``` + +In addition, a new Status field, `queuePosition`, will be added to track the backup's current position in the queue. +```go + // QueuePosition is the position held by the backup in the queue. + // QueuePosition=1 means this backup is the next to be considered. + // Only relevant when Phase is "Queued" + // +optional + QueuePosition int `json:"queuePosition,omitempty"` +``` + +### New Controller: `backupQueueReconciler` + +A new reconciler will be added, `backupQueueReconciler` which will reconcile backups under these conditions: +1) Watching Create/Update for backups in `New` (or empty) phase +2) Watching for Backup phase transition from `InProgress` to something else to reconcile all `Queued` backups +2) Watching for Backup phase transition from `New` (or empty) to `Queued` to reconcile all `Queued` backups +2) Periodic reconcile of `Queued` backups to handle backups queued at server startup as well as to make sure we never have a situation where backups are queued indefinitely because of a race condition or was otherwise missed in the reconcile on prior backup completion. + +The reconciler will be set up as follows -- note that New backups are reconciled on Create/Update, while Queued backups are reconciled when an InProgress backup moves on to another state or when a new backup moves to the Queued state. We also reconcile Queued backups periodically to handle the case of a Velero pod restart with Queued backups, as well as to handle possible edge cases where a queued backup doesn't get moved out of the queue at the point of backup completion or an error occurs during a prior Queued backup reconcile. + +```go +func (c *backupOperationsReconciler) SetupWithManager(mgr ctrl.Manager) error { + // only consider Queued backups, order by QueuePosition + gp := kube.NewGenericEventPredicate(func(object client.Object) bool { + backup := object.(*velerov1api.Backup) + return (backup.Status.Phase == velerov1api.BackupPhaseQueued) + }) + s := kube.NewPeriodicalEnqueueSource(c.logger.WithField("controller", constant.ControllerBackupOperations), mgr.GetClient(), &velerov1api.BackupList{}, c.frequency, kube.PeriodicalEnqueueSourceOption{ + Predicates: []predicate.Predicate{gp}, + OrderFunc: queuePositionOrderFunc, + }) + + return ctrl.NewControllerManagedBy(mgr). + For(&velerov1api.Backup{}, builder.WithPredicates(predicate.Funcs{ + UpdateFunc: func(ue event.UpdateEvent) bool { + backup := ue.ObjectNew.(*velerov1api.Backup) + return backup.Status.Phase == "" || backup.status.Phase == velerov1api.BackupPhaseNew + }, + CreateFunc: func(event.CreateEvent) bool { + return backup.Status.Phase == "" || backup.status.Phase == velerov1api.BackupPhaseNew + }, + DeleteFunc: func(de event.DeleteEvent) bool { + return false + }, + GenericFunc: func(ge event.GenericEvent) bool { + return false + }, + })). + Watch( + &source.Kind{Type: &velerov1api.Backup{}}, + &handler.EnqueueRequestsFromMapFunc{ + ToRequests: handler.ToRequestsFunc(func(a handler.MapObject) []reconcile.Request { + backupList := velerov1api.BackupList{} + if err := p.List(ctx, backupList); err != nil { + p.logger.WithError(err).Error("error listing backups") + return + } + requests = []reconcile.request{} + // filter backup list by Phase=queued + // sort backup list by queuePosition + return requests + }), + }, + builder.WithPredicates(predicate.Funcs{ + UpdateFunc: func(ue event.UpdateEvent) bool { + oldBackup := ue.ObjectOld.(*velerov1api.Backup) + newBackup := ue.ObjectNew.(*velerov1api.Backup) + return oldBackup.Status.Phase == velerov1api.BackupPhaseInProgress && + newBackup.Status.Phase != velerov1api.BackupPhaseInProgress || + oldBackup.Status.Phase != velerov1api.BackupPhaseQueued && + newBackup.Status.Phase == velerov1api.BackupPhaseQueued + }, + CreateFunc: func(event.CreateEvent) bool { + return false + }, + DeleteFunc: func(de event.DeleteEvent) bool { + return false + }, + GenericFunc: func(ge event.GenericEvent) bool { + return false + }, + }). + WatchesRawSource(s). + Named(constant.ControllerBackupQueue). + Complete(c) +} +``` + +New backups will be queued: Phase will be set to `Queued`, and `QueuePosition` will be set to a int value incremented from the highest current `QueuePosition` value among Queued backups. + +Queued backups will be removed from the queue if runnable: +1) If the total number of backups either InProgress or ReadyToStart is greater than or equal to the concurrency limit, then exit without removing from the queue. +2) If the current backup overlaps with any InProgress, ReadyToStart, or Queued backup with `QueuePosition < currentBackup.QueuePosition` then exit without removing from the queue. +3) If we get here, the backup is runnable. To resolve a potential race condition where an InProgress backup completes between reconciling the backup with QueuePosition `n-1` and reconciling the current backup with QueuePosition `n`, we also check to see whether there are any runnable backups in the queue ahead of this one. The only time this will happen is if a backup completes immediately before reconcile starts which either frees up a concurrency slot or removes a namespace conflict. In this case, we don't want to run the current backup since the one ahead of this one in the queue (which was recently passed over before the InProgress backup completed) must run first. In this case, exit without removing from the queue. +4) If we get here, remove the backup from the queue by setting Phase to `ReadyToStart` and `QueuePosition` to zero. Decrement the `QueuePosition` of any other Queued backups with a `QueuePosition` higher than the current backup's queue position prior to dequeuing. At this point, the backup reconciler will start the backup. + +`if len(inProgressBackups)+len(pendingStartBackups) >= concurrentBackups` + +``` + switch original.Status.Phase { + case "", velerov1api.BackupPhaseNew: + // enqueue backup -- set phase=Queued, set queuePosition=maxCurrentQueuePosition+1 + } + // We should only ever get these events when added in order by the periodical enqueue source + // so as long as the current backup has not conflicts ahead of it or running, we should be good to + // dequeue + case "", velerov1api.BackupPhaseQueued: + // list backups, filter on Queued, ReadyToStart, and InProgress + // if number of InProgress backups + number of ReadyToStart backups >= concurrency limit, exit + // generate list of all namespaces included in InProgress, ReadyToStart, and Queued backups with + // queuePosition < backup.Status.QueuePosition + // if overlap found, exit + // check backups ahead of this one in the queue for runnability. If any are runnable, exit + // dequeue backup: set Phase to ReadyToStart, QueuePosition to 0, and decrement QueuePosition + // for all QueuedBackups behind this one in the queue + } + +``` + +The queue controller will run as a single reconciler thread, so we will not need to deal with concurrency issues when moving backups from New to Queued or from Queued to ReadyToStart, and all of the updates to QueuePosition will be from a single thread. + +### Updates to Backup controller + +The Reconcile logic will be updated to respond to ReadyToStart backups instead of New backups: + +``` +@@ -234,8 +234,8 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr + // InProgress, we still need this check so we can return nil to indicate we've finished processing + // this key (even though it was a no-op). + switch original.Status.Phase { +- case "", velerov1api.BackupPhaseNew: +- // only process new backups ++ case velerov1api.BackupPhaseReadyToStart: ++ // only process ReadyToStart backups + default: + b.logger.WithFields(logrus.Fields{ + "backup": kubeutil.NamespaceAndName(original), +``` + +In addition, it will be configured to run in parallel by setting `MaxConcurrentReconciles` based on the `concurrent-backups` server arg. + +``` +@@ -149,6 +149,9 @@ func NewBackupReconciler( + func (b *backupReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&velerov1api.Backup{}). ++ WithOptions(controller.Options{ ++ MaxConcurrentReconciles: concurrentBackups, ++ }). + Named(constant.ControllerBackup). + Complete(b) + } +``` + +The controller-runtime core reconciler logic already prevents the same resource from being reconciled by two different reconciler threads, so we don't need to worry about concurrency issues at the controller level. + +The workerPool reference will be moved from the backupReconciler to the backupRequest, since this will now be backup-specific, and the initialization code for the worker pool will be moved from the reconciler init into the backup reconcile. This worker pool will be shut down upon exiting the Reconcile method. + +### Resilience to restart of velero pod + +The new backup phases (`Queued` and `ReadyToStart`) will be resilient to velero pod restarts. If the velero pod crashes or is restarted, only backups in the `InProgress` phase will be failed, so there is no change to current behavior. Queued backups will retain their queue position on restart, and ReadyToStart backups will move to InProgress when reconciled. + +### Observability + +#### Logging + +When a backup is dequeued, an info log message will also include the wait time, calculated as `now - creationTimestamp`. When a backup is passed over due to overlap, an info log message will indicate which namespaces were in conflict. + +#### Velero CLI + +The `velero backup describe` output will include the current queue position for queued backups. diff --git a/go.mod b/go.mod index 033773e82..d32340b2b 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/vmware-tanzu/velero -go 1.24 +go 1.24.0 require ( cloud.google.com/go/storage v1.55.0 @@ -17,7 +17,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/s3 v1.48.0 github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 github.com/bombsimon/logrusr/v3 v3.0.0 - github.com/evanphx/json-patch/v5 v5.9.0 + github.com/evanphx/json-patch/v5 v5.9.11 github.com/fatih/color v1.18.0 github.com/gobwas/glob v0.2.3 github.com/google/go-cmp v0.7.0 @@ -27,8 +27,8 @@ require ( github.com/joho/godotenv v1.3.0 github.com/kopia/kopia v0.16.0 github.com/kubernetes-csi/external-snapshotter/client/v8 v8.2.0 - github.com/onsi/ginkgo/v2 v2.19.0 - github.com/onsi/gomega v1.33.1 + github.com/onsi/ginkgo/v2 v2.22.0 + github.com/onsi/gomega v1.36.1 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.22.0 @@ -49,17 +49,17 @@ require ( google.golang.org/grpc v1.73.0 google.golang.org/protobuf v1.36.6 gopkg.in/yaml.v3 v3.0.1 - k8s.io/api v0.31.3 - k8s.io/apiextensions-apiserver v0.31.3 - k8s.io/apimachinery v0.31.3 - k8s.io/cli-runtime v0.31.3 - k8s.io/client-go v0.31.3 + k8s.io/api v0.33.3 + k8s.io/apiextensions-apiserver v0.33.3 + k8s.io/apimachinery v0.33.3 + k8s.io/cli-runtime v0.33.3 + k8s.io/client-go v0.33.3 k8s.io/klog/v2 v2.130.1 - k8s.io/kube-aggregator v0.31.3 - k8s.io/metrics v0.31.3 - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 - sigs.k8s.io/controller-runtime v0.19.3 - sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd + k8s.io/kube-aggregator v0.33.3 + k8s.io/metrics v0.33.3 + k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 + sigs.k8s.io/controller-runtime v0.21.0 + sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 sigs.k8s.io/yaml v1.4.0 ) @@ -72,8 +72,8 @@ require ( cloud.google.com/go/iam v1.5.2 // indirect cloud.google.com/go/monitoring v1.24.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1 // indirect - github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect - github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 // indirect + github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect @@ -91,6 +91,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.6 // indirect github.com/aws/smithy-go v1.19.0 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chmduquesne/rollinghash v4.0.0+incompatible // indirect github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f // indirect @@ -101,32 +102,31 @@ require ( github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/go-ini/ini v1.67.0 // indirect github.com/go-jose/go-jose/v4 v4.0.5 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect - github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.22.4 // indirect + github.com/go-openapi/swag v0.23.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/goccy/go-json v0.10.5 // indirect github.com/gofrs/flock v0.12.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt/v5 v5.2.2 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect github.com/google/s2a-go v0.1.9 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/gax-go/v2 v2.14.2 // indirect - github.com/gorilla/websocket v1.5.0 // indirect + github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/hashicorp/cronexpr v1.1.2 // indirect github.com/hashicorp/yamux v0.1.1 // indirect - github.com/imdario/mergo v0.3.13 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -144,7 +144,7 @@ require ( github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/minio-go/v7 v7.0.94 // indirect github.com/mitchellh/go-testing-interface v1.0.0 // indirect - github.com/moby/spdystream v0.4.0 // indirect + github.com/moby/spdystream v0.5.0 // indirect github.com/moby/term v0.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect @@ -181,7 +181,7 @@ require ( go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/crypto v0.40.0 // indirect - golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 // indirect + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.34.0 // indirect golang.org/x/term v0.33.0 // indirect @@ -193,9 +193,9 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect ) replace github.com/kopia/kopia => github.com/project-velero/kopia v0.0.0-20250722052735-3ea24d208777 diff --git a/go.sum b/go.sum index 99f7a87ee..84a94ed32 100644 --- a/go.sum +++ b/go.sum @@ -84,8 +84,8 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.0/go.mod h1:DWAciXemNf++PQJLeXUB4HHH5OpsAh12HZnu2wXE1jA= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.1 h1:lhZdRq7TIx0GJQvSyX2Si406vrYsov2FXGp/RnSEtcs= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.1/go.mod h1:8cl44BDmi+effbARHMQjgOKA2AYvcohNm7KEt42mSV8= -github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= -github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= github.com/Azure/go-autorest/autorest v0.11.18/go.mod h1:dSiJPy22c3u0OtOKDNttNgqpNFY/GeWa7GH/Pz56QRA= github.com/Azure/go-autorest/autorest/adal v0.9.13/go.mod h1:W/MM4U6nLxnIskrw4UwWzlHfGjwUS50aOsc/I3yuU8M= @@ -95,8 +95,8 @@ github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZ github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= -github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 h1:oygO0locgZJe7PpYPXT5A29ZkwJaPqcva7BVeemZOZs= -github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= +github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI= +github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 h1:IEjq88XO4PuBDcvmjQJcQGg+w+UaafSy8G5Kcb5tBhI= @@ -170,6 +170,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bombsimon/logrusr/v3 v3.0.0 h1:tcAoLfuAhKP9npBxWzSdpsvKPQt1XV02nSf2lZA82TQ= github.com/bombsimon/logrusr/v3 v3.0.0/go.mod h1:PksPPgSFEL2I52pla2glgCyyd2OqOHAnFF5E+g8Ixco= github.com/bufbuild/protocompile v0.4.0 h1:LbFKd2XowZvQ/kajzguUp2DC9UEIQhIq77fZZlaQsNA= @@ -239,8 +241,8 @@ github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2T github.com/evanphx/json-patch v4.11.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= -github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= -github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= @@ -282,8 +284,9 @@ github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg= github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= @@ -291,8 +294,8 @@ github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= -github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= @@ -318,7 +321,6 @@ github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4er github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -350,8 +352,10 @@ github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= -github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= -github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= +github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -389,8 +393,8 @@ github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM= -github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= @@ -413,8 +417,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= -github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= +github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= @@ -455,8 +459,6 @@ github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1: github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk= -github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= @@ -550,8 +552,8 @@ github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:F github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= -github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8= -github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= +github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU= +github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -584,13 +586,13 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= -github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= -github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= @@ -804,8 +806,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 h1:k/i9J1pBpvlfR+9QsetwPyERsqu1GIbi967PQMq3Ivc= -golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -1206,7 +1208,6 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= @@ -1217,47 +1218,50 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= k8s.io/api v0.22.2/go.mod h1:y3ydYpLJAaDI+BbSe2xmGcqxiWHmWjkEeIbiwHvnPR8= -k8s.io/api v0.31.3 h1:umzm5o8lFbdN/hIXbrK9oRpOproJO62CV1zqxXrLgk8= -k8s.io/api v0.31.3/go.mod h1:UJrkIp9pnMOI9K2nlL6vwpxRzzEX5sWgn8kGQe92kCE= -k8s.io/apiextensions-apiserver v0.31.3 h1:+GFGj2qFiU7rGCsA5o+p/rul1OQIq6oYpQw4+u+nciE= -k8s.io/apiextensions-apiserver v0.31.3/go.mod h1:2DSpFhUZZJmn/cr/RweH1cEVVbzFw9YBu4T+U3mf1e4= +k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8= +k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE= +k8s.io/apiextensions-apiserver v0.33.3 h1:qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs= +k8s.io/apiextensions-apiserver v0.33.3/go.mod h1:oROuctgo27mUsyp9+Obahos6CWcMISSAPzQ77CAQGz8= k8s.io/apimachinery v0.22.2/go.mod h1:O3oNtNadZdeOMxHFVxOreoznohCpy0z6mocxbZr7oJ0= -k8s.io/apimachinery v0.31.3 h1:6l0WhcYgasZ/wk9ktLq5vLaoXJJr5ts6lkaQzgeYPq4= -k8s.io/apimachinery v0.31.3/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA= +k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= k8s.io/cli-runtime v0.22.2/go.mod h1:tkm2YeORFpbgQHEK/igqttvPTRIHFRz5kATlw53zlMI= -k8s.io/cli-runtime v0.31.3 h1:fEQD9Xokir78y7pVK/fCJN090/iYNrLHpFbGU4ul9TI= -k8s.io/cli-runtime v0.31.3/go.mod h1:Q2jkyTpl+f6AtodQvgDI8io3jrfr+Z0LyQBPJJ2Btq8= +k8s.io/cli-runtime v0.33.3 h1:Dgy4vPjNIu8LMJBSvs8W0LcdV0PX/8aGG1DA1W8lklA= +k8s.io/cli-runtime v0.33.3/go.mod h1:yklhLklD4vLS8HNGgC9wGiuHWze4g7x6XQZ+8edsKEo= k8s.io/client-go v0.22.2/go.mod h1:sAlhrkVDf50ZHx6z4K0S40wISNTarf1r800F+RlCF6U= -k8s.io/client-go v0.31.3 h1:CAlZuM+PH2cm+86LOBemaJI/lQ5linJ6UFxKX/SoG+4= -k8s.io/client-go v0.31.3/go.mod h1:2CgjPUTpv3fE5dNygAr2NcM8nhHzXvxB8KL5gYc3kJs= +k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA= +k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg= k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.9.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-aggregator v0.31.3 h1:DqHPdTglJHgOfB884AaroyxrML/aL82ASYOh65m7MSk= -k8s.io/kube-aggregator v0.31.3/go.mod h1:Kx59Xjnf0SnY47qf9Or++4y3XCHQ3kR0xk1Di6KFiFU= +k8s.io/kube-aggregator v0.33.3 h1:Pa6hQpKJMX0p0D2wwcxXJgu02++gYcGWXoW1z1ZJDfo= +k8s.io/kube-aggregator v0.33.3/go.mod h1:hwvkUoQ8q6gv0+SgNnlmQ3eUue1zHhJKTHsX7BwxwSE= k8s.io/kube-openapi v0.0.0-20210421082810-95288971da7e/go.mod h1:vHXdDvt9+2spS2Rx9ql3I8tycm3H9FDfdUoIuKCefvw= -k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= -k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= -k8s.io/metrics v0.31.3 h1:DkT9I3gFlb2/z+/4BMY7WrQ/PnbukuV4Yli82v/KBCM= -k8s.io/metrics v0.31.3/go.mod h1:2w9gpd8z+13oJmaPR6p3kDyrDqnxSyoKpnOw2qLIdhI= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= +k8s.io/metrics v0.33.3 h1:9CcqBz15JZfISqwca33gdHS8I6XfsK1vA8WUdEnG70g= +k8s.io/metrics v0.33.3/go.mod h1:Aw+cdg4AYHw0HvUY+lCyq40FOO84awrqvJRTw0cmXDs= k8s.io/utils v0.0.0-20210819203725-bdf08cb9a70a/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= -sigs.k8s.io/controller-runtime v0.19.3 h1:XO2GvC9OPftRst6xWCpTgBZO04S2cbp0Qqkj8bX1sPw= -sigs.k8s.io/controller-runtime v0.19.3/go.mod h1:j4j87DqtsThvwTv5/Tc5NFRyyF/RF0ip4+62tbTSIUM= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8= +sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= sigs.k8s.io/kustomize/api v0.8.11/go.mod h1:a77Ls36JdfCWojpUqR6m60pdGY1AYFix4AH83nJtY1g= sigs.k8s.io/kustomize/kyaml v0.11.0/go.mod h1:GNMwjim4Ypgp/MueD3zXHLRJEjz7RvtPae0AwlvEMFM= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v4 v4.0.2/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= sigs.k8s.io/structured-merge-diff/v4 v4.1.2/go.mod h1:j/nl6xW8vLS49O8YvXW1ocPhZawJtm+Yrr7PPRQ0Vg4= -sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= -sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/pkg/backup/backup.go b/pkg/backup/backup.go index a11acd52a..824c44875 100644 --- a/pkg/backup/backup.go +++ b/pkg/backup/backup.go @@ -366,7 +366,7 @@ func (kb *kubernetesBackupper) BackupWithResolvers( discoveryHelper: kb.discoveryHelper, podVolumeBackupper: podVolumeBackupper, podVolumeSnapshotTracker: podvolume.NewTracker(), - volumeSnapshotterGetter: volumeSnapshotterGetter, + volumeSnapshotterCache: NewVolumeSnapshotterCache(volumeSnapshotterGetter), itemHookHandler: &hook.DefaultItemHookHandler{ PodCommandExecutor: kb.podCommandExecutor, }, diff --git a/pkg/backup/backup_test.go b/pkg/backup/backup_test.go index 1cf80fe0f..eb6c2d9d4 100644 --- a/pkg/backup/backup_test.go +++ b/pkg/backup/backup_test.go @@ -3269,7 +3269,7 @@ func TestBackupWithSnapshots(t *testing.T) { err := h.backupper.Backup(h.log, tc.req, backupFile, nil, nil, tc.snapshotterGetter) require.NoError(t, err) - assert.Equal(t, tc.want, tc.req.VolumeSnapshots) + assert.Equal(t, tc.want, tc.req.VolumeSnapshots.Get()) }) } } @@ -4213,7 +4213,7 @@ func TestBackupWithPodVolume(t *testing.T) { assert.Equal(t, tc.want, req.PodVolumeBackups) // this assumes that we don't have any test cases where some PVs should be snapshotted using a VolumeSnapshotter - assert.Nil(t, req.VolumeSnapshots) + assert.Nil(t, req.VolumeSnapshots.Get()) }) } } diff --git a/pkg/backup/item_backupper.go b/pkg/backup/item_backupper.go index b890b23c3..3622ac7e1 100644 --- a/pkg/backup/item_backupper.go +++ b/pkg/backup/item_backupper.go @@ -70,13 +70,11 @@ type itemBackupper struct { discoveryHelper discovery.Helper podVolumeBackupper podvolume.Backupper podVolumeSnapshotTracker *podvolume.Tracker - volumeSnapshotterGetter VolumeSnapshotterGetter kubernetesBackupper *kubernetesBackupper - - itemHookHandler hook.ItemHookHandler - snapshotLocationVolumeSnapshotters map[string]vsv1.VolumeSnapshotter - hookTracker *hook.HookTracker - volumeHelperImpl volumehelper.VolumeHelper + volumeSnapshotterCache *VolumeSnapshotterCache + itemHookHandler hook.ItemHookHandler + hookTracker *hook.HookTracker + volumeHelperImpl volumehelper.VolumeHelper } type FileForArchive struct { @@ -502,30 +500,6 @@ func (ib *itemBackupper) executeActions( return obj, itemFiles, nil } -// volumeSnapshotter instantiates and initializes a VolumeSnapshotter given a VolumeSnapshotLocation, -// or returns an existing one if one's already been initialized for the location. -func (ib *itemBackupper) volumeSnapshotter(snapshotLocation *velerov1api.VolumeSnapshotLocation) (vsv1.VolumeSnapshotter, error) { - if bs, ok := ib.snapshotLocationVolumeSnapshotters[snapshotLocation.Name]; ok { - return bs, nil - } - - bs, err := ib.volumeSnapshotterGetter.GetVolumeSnapshotter(snapshotLocation.Spec.Provider) - if err != nil { - return nil, err - } - - if err := bs.Init(snapshotLocation.Spec.Config); err != nil { - return nil, err - } - - if ib.snapshotLocationVolumeSnapshotters == nil { - ib.snapshotLocationVolumeSnapshotters = make(map[string]vsv1.VolumeSnapshotter) - } - ib.snapshotLocationVolumeSnapshotters[snapshotLocation.Name] = bs - - return bs, nil -} - // zoneLabelDeprecated is the label that stores availability-zone info // on PVs this is deprecated on Kubernetes >= 1.17.0 // zoneLabel is the label that stores availability-zone info @@ -641,7 +615,7 @@ func (ib *itemBackupper) takePVSnapshot(obj runtime.Unstructured, log logrus.Fie for _, snapshotLocation := range ib.backupRequest.SnapshotLocations { log := log.WithField("volumeSnapshotLocation", snapshotLocation.Name) - bs, err := ib.volumeSnapshotter(snapshotLocation) + bs, err := ib.volumeSnapshotterCache.SetNX(snapshotLocation) if err != nil { log.WithError(err).Error("Error getting volume snapshotter for volume snapshot location") continue @@ -699,7 +673,7 @@ func (ib *itemBackupper) takePVSnapshot(obj runtime.Unstructured, log logrus.Fie snapshot.Status.Phase = volume.SnapshotPhaseCompleted snapshot.Status.ProviderSnapshotID = snapshotID } - ib.backupRequest.VolumeSnapshots = append(ib.backupRequest.VolumeSnapshots, snapshot) + ib.backupRequest.VolumeSnapshots.Add(snapshot) // nil errors are automatically removed return kubeerrs.NewAggregate(errs) diff --git a/pkg/backup/request.go b/pkg/backup/request.go index 3ec05ee04..c3dae48a6 100644 --- a/pkg/backup/request.go +++ b/pkg/backup/request.go @@ -17,6 +17,8 @@ limitations under the License. package backup import ( + "sync" + "github.com/vmware-tanzu/velero/internal/hook" "github.com/vmware-tanzu/velero/internal/resourcepolicies" "github.com/vmware-tanzu/velero/internal/volume" @@ -32,11 +34,27 @@ type itemKey struct { name string } +type SynchronizedVSList struct { + sync.Mutex + VolumeSnapshotList []*volume.Snapshot +} + +func (s *SynchronizedVSList) Add(vs *volume.Snapshot) { + s.Lock() + defer s.Unlock() + s.VolumeSnapshotList = append(s.VolumeSnapshotList, vs) +} + +func (s *SynchronizedVSList) Get() []*volume.Snapshot { + s.Lock() + defer s.Unlock() + return s.VolumeSnapshotList +} + // Request is a request for a backup, with all references to other objects // materialized (e.g. backup/snapshot locations, includes/excludes, etc.) type Request struct { *velerov1api.Backup - StorageLocation *velerov1api.BackupStorageLocation SnapshotLocations []*velerov1api.VolumeSnapshotLocation NamespaceIncludesExcludes *collections.IncludesExcludes @@ -44,7 +62,7 @@ type Request struct { ResourceHooks []hook.ResourceHook ResolvedActions []framework.BackupItemResolvedActionV2 ResolvedItemBlockActions []framework.ItemBlockResolvedAction - VolumeSnapshots []*volume.Snapshot + VolumeSnapshots SynchronizedVSList PodVolumeBackups []*velerov1api.PodVolumeBackup BackedUpItems *backedUpItemsMap itemOperationsList *[]*itemoperation.BackupOperation @@ -80,7 +98,7 @@ func (r *Request) FillVolumesInformation() { } r.VolumesInformation.SkippedPVs = skippedPVMap - r.VolumesInformation.NativeSnapshots = r.VolumeSnapshots + r.VolumesInformation.NativeSnapshots = r.VolumeSnapshots.Get() r.VolumesInformation.PodVolumeBackups = r.PodVolumeBackups r.VolumesInformation.BackupOperations = *r.GetItemOperationsList() r.VolumesInformation.BackupName = r.Backup.Name diff --git a/pkg/backup/volume_snapshotter_cache.go b/pkg/backup/volume_snapshotter_cache.go new file mode 100644 index 000000000..620ebc337 --- /dev/null +++ b/pkg/backup/volume_snapshotter_cache.go @@ -0,0 +1,42 @@ +package backup + +import ( + "sync" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + vsv1 "github.com/vmware-tanzu/velero/pkg/plugin/velero/volumesnapshotter/v1" +) + +type VolumeSnapshotterCache struct { + cache map[string]vsv1.VolumeSnapshotter + mutex sync.Mutex + getter VolumeSnapshotterGetter +} + +func NewVolumeSnapshotterCache(getter VolumeSnapshotterGetter) *VolumeSnapshotterCache { + return &VolumeSnapshotterCache{ + cache: make(map[string]vsv1.VolumeSnapshotter), + getter: getter, + } +} + +func (c *VolumeSnapshotterCache) SetNX(location *velerov1api.VolumeSnapshotLocation) (vsv1.VolumeSnapshotter, error) { + c.mutex.Lock() + defer c.mutex.Unlock() + + if snapshotter, exists := c.cache[location.Name]; exists { + return snapshotter, nil + } + + snapshotter, err := c.getter.GetVolumeSnapshotter(location.Spec.Provider) + if err != nil { + return nil, err + } + + if err := snapshotter.Init(location.Spec.Config); err != nil { + return nil, err + } + + c.cache[location.Name] = snapshotter + return snapshotter, nil +} diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index 532a5f332..c9728ea82 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -734,8 +734,8 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { // native snapshots phase will either be failed or completed right away // https://github.com/vmware-tanzu/velero/blob/de3ea52f0cc478e99efa7b9524c7f353514261a4/pkg/backup/item_backupper.go#L632-L639 - backup.Status.VolumeSnapshotsAttempted = len(backup.VolumeSnapshots) - for _, snap := range backup.VolumeSnapshots { + backup.Status.VolumeSnapshotsAttempted = len(backup.VolumeSnapshots.Get()) + for _, snap := range backup.VolumeSnapshots.Get() { if snap.Status.Phase == volume.SnapshotPhaseCompleted { backup.Status.VolumeSnapshotsCompleted++ } @@ -882,7 +882,7 @@ func persistBackup(backup *pkgbackup.Request, } // Velero-native volume snapshots (as opposed to CSI ones) - nativeVolumeSnapshots, errs := encode.ToJSONGzip(backup.VolumeSnapshots, "native volumesnapshots list") + nativeVolumeSnapshots, errs := encode.ToJSONGzip(backup.VolumeSnapshots.Get(), "native volumesnapshots list") if errs != nil { persistErrs = append(persistErrs, errs...) } diff --git a/pkg/controller/backup_repository_controller_test.go b/pkg/controller/backup_repository_controller_test.go index b8de27539..0a4a04610 100644 --- a/pkg/controller/backup_repository_controller_test.go +++ b/pkg/controller/backup_repository_controller_test.go @@ -1047,7 +1047,7 @@ func TestRecallMaintenance(t *testing.T) { { name: "wait completion error", runtimeScheme: schemeFail, - expectedErr: "error waiting incomplete repo maintenance job for repo repo: error listing maintenance job for repo repo: no kind is registered for the type v1.JobList in scheme \"pkg/runtime/scheme.go:100\"", + expectedErr: "error waiting incomplete repo maintenance job for repo repo: error listing maintenance job for repo repo: no kind is registered for the type v1.JobList in scheme", }, { name: "no consolidate result", @@ -1105,7 +1105,7 @@ func TestRecallMaintenance(t *testing.T) { err := r.recallMaintenance(t.Context(), backupRepo, velerotest.NewLogger()) if test.expectedErr != "" { - assert.EqualError(t, err, test.expectedErr) + assert.ErrorContains(t, err, test.expectedErr) } else { assert.NoError(t, err) diff --git a/pkg/controller/schedule_controller.go b/pkg/controller/schedule_controller.go index 799a8c77a..ec8894571 100644 --- a/pkg/controller/schedule_controller.go +++ b/pkg/controller/schedule_controller.go @@ -229,7 +229,7 @@ func (c *scheduleReconciler) checkIfBackupInNewOrProgress(schedule *velerov1.Sch } for _, backup := range backupList.Items { - if backup.Status.Phase == velerov1.BackupPhaseNew || backup.Status.Phase == velerov1.BackupPhaseInProgress { + if backup.Status.Phase == "" || backup.Status.Phase == velerov1.BackupPhaseNew || backup.Status.Phase == velerov1.BackupPhaseInProgress { log.Debugf("%s/%s still has backups that are in InProgress or New...", schedule.Namespace, schedule.Name) return true } diff --git a/pkg/controller/schedule_controller_test.go b/pkg/controller/schedule_controller_test.go index ab0a3f66d..f4585763c 100644 --- a/pkg/controller/schedule_controller_test.go +++ b/pkg/controller/schedule_controller_test.go @@ -149,6 +149,13 @@ func TestReconcileOfSchedule(t *testing.T) { expectedPhase: string(velerov1.SchedulePhaseEnabled), backup: builder.ForBackup("ns", "name-20220905120000").ObjectMeta(builder.WithLabels(velerov1.ScheduleNameLabel, "name")).Phase(velerov1.BackupPhaseNew).Result(), }, + { + name: "schedule already has backup with empty phase (not yet reconciled).", + schedule: newScheduleBuilder(velerov1.SchedulePhaseEnabled).CronSchedule("@every 5m").LastBackupTime("2000-01-01 00:00:00").Result(), + fakeClockTime: "2017-01-01 12:00:00", + expectedPhase: string(velerov1.SchedulePhaseEnabled), + backup: builder.ForBackup("ns", "name-20220905120000").ObjectMeta(builder.WithLabels(velerov1.ScheduleNameLabel, "name")).Phase("").Result(), + }, } for _, test := range tests { @@ -215,10 +222,10 @@ func TestReconcileOfSchedule(t *testing.T) { backups := &velerov1.BackupList{} require.NoError(t, client.List(ctx, backups)) - // If backup associated with schedule's status is in New or InProgress, + // If backup associated with schedule's status is in New or InProgress or empty phase, // new backup shouldn't be submitted. if test.backup != nil && - (test.backup.Status.Phase == velerov1.BackupPhaseNew || test.backup.Status.Phase == velerov1.BackupPhaseInProgress) { + (test.backup.Status.Phase == "" || test.backup.Status.Phase == velerov1.BackupPhaseNew || test.backup.Status.Phase == velerov1.BackupPhaseInProgress) { assert.Len(t, backups.Items, 1) require.NoError(t, client.Delete(ctx, test.backup)) } @@ -479,4 +486,19 @@ func TestCheckIfBackupInNewOrProgress(t *testing.T) { reconciler = NewScheduleReconciler("namespace", logger, client, metrics.NewServerMetrics(), false) result = reconciler.checkIfBackupInNewOrProgress(testSchedule) assert.True(t, result) + + // Clean backup in InProgress phase. + err = client.Delete(ctx, inProgressBackup) + require.NoError(t, err, "fail to delete backup in InProgress phase in TestCheckIfBackupInNewOrProgress: %v", err) + + // Create backup with empty phase (not yet reconciled). + emptyPhaseBackup := builder.ForBackup("ns", "backup-3"). + ObjectMeta(builder.WithLabels(velerov1.ScheduleNameLabel, "name")). + Phase("").Result() + err = client.Create(ctx, emptyPhaseBackup) + require.NoError(t, err, "fail to create backup with empty phase in TestCheckIfBackupInNewOrProgress: %v", err) + + reconciler = NewScheduleReconciler("namespace", logger, client, metrics.NewServerMetrics(), false) + result = reconciler.checkIfBackupInNewOrProgress(testSchedule) + assert.True(t, result) } diff --git a/pkg/itemblock/actions/pvc_action.go b/pkg/itemblock/actions/pvc_action.go index b5d7074af..6777ef566 100644 --- a/pkg/itemblock/actions/pvc_action.go +++ b/pkg/itemblock/actions/pvc_action.go @@ -39,6 +39,8 @@ import ( type PVCAction struct { log logrus.FieldLogger crClient crclient.Client + // map[namespace]->[map[pvcVolumes]->[]podName] + nsPVCs map[string]map[string][]string } func NewPVCAction(f client.Factory) plugincommon.HandlerInitializer { @@ -78,31 +80,18 @@ func (a *PVCAction) GetRelatedItems(item runtime.Unstructured, backup *v1.Backup // Adds pods mounting this PVC to ensure that multiple pods mounting the same RWX // volume get backed up together. - pods := new(corev1api.PodList) - err := a.crClient.List(context.Background(), pods, crclient.InNamespace(pvc.Namespace)) + pvcs, err := a.getPVCList(pvc.Namespace) if err != nil { - return nil, errors.Wrap(err, "failed to list pods") + return nil, err } - for i := range pods.Items { - for _, volume := range pods.Items[i].Spec.Volumes { - if volume.VolumeSource.PersistentVolumeClaim == nil { - continue - } - if volume.PersistentVolumeClaim.ClaimName == pvc.Name { - if kube.IsPodRunning(&pods.Items[i]) != nil { - a.log.Infof("Related pod %s is not running, not adding to ItemBlock for PVC %s", pods.Items[i].Name, pvc.Name) - } else { - a.log.Infof("Adding related Pod %s to PVC %s", pods.Items[i].Name, pvc.Name) - relatedItems = append(relatedItems, velero.ResourceIdentifier{ - GroupResource: kuberesource.Pods, - Namespace: pods.Items[i].Namespace, - Name: pods.Items[i].Name, - }) - } - break - } - } + for _, pod := range pvcs[pvc.Name] { + a.log.Infof("Adding related Pod %s to PVC %s", pod, pvc.Name) + relatedItems = append(relatedItems, velero.ResourceIdentifier{ + GroupResource: kuberesource.Pods, + Namespace: pvc.Namespace, + Name: pod, + }) } // Gather groupedPVCs based on VGS label provided in the backup @@ -117,6 +106,35 @@ func (a *PVCAction) GetRelatedItems(item runtime.Unstructured, backup *v1.Backup return relatedItems, nil } +func (a *PVCAction) getPVCList(ns string) (map[string][]string, error) { + if a.nsPVCs == nil { + a.nsPVCs = make(map[string]map[string][]string) + } + pvcList, ok := a.nsPVCs[ns] + if ok { + return pvcList, nil + } + pvcList = make(map[string][]string) + pods := new(corev1api.PodList) + err := a.crClient.List(context.Background(), pods, crclient.InNamespace(ns)) + if err != nil { + return nil, errors.Wrap(err, "failed to list pods") + } + for i := range pods.Items { + if kube.IsPodRunning(&pods.Items[i]) != nil { + a.log.Debugf("Pod %s is not running, not adding to Pod list for PVC IBA plugin", pods.Items[i].Name) + continue + } + for _, volume := range pods.Items[i].Spec.Volumes { + if volume.VolumeSource.PersistentVolumeClaim != nil { + pvcList[volume.VolumeSource.PersistentVolumeClaim.ClaimName] = append(pvcList[volume.VolumeSource.PersistentVolumeClaim.ClaimName], pods.Items[i].Name) + } + } + } + a.nsPVCs[ns] = pvcList + return pvcList, nil +} + func (a *PVCAction) Name() string { return "PVCItemBlockAction" } diff --git a/pkg/repository/ensurer_test.go b/pkg/repository/ensurer_test.go index 4003aa369..b2d60eb21 100644 --- a/pkg/repository/ensurer_test.go +++ b/pkg/repository/ensurer_test.go @@ -81,7 +81,7 @@ func TestEnsureRepo(t *testing.T) { namespace: "fake-ns", bsl: "fake-bsl", repositoryType: "fake-repo-type", - err: "error getting backup repository list: no kind is registered for the type v1.BackupRepositoryList in scheme \"pkg/runtime/scheme.go:100\"", + err: "error getting backup repository list: no kind is registered for the type v1.BackupRepositoryList in scheme", }, { name: "success on existing repo", @@ -128,7 +128,7 @@ func TestEnsureRepo(t *testing.T) { repo, err := ensurer.EnsureRepo(t.Context(), velerov1.DefaultNamespace, test.namespace, test.bsl, test.repositoryType) if err != nil { - require.EqualError(t, err, test.err) + require.ErrorContains(t, err, test.err) } else { require.NoError(t, err) } @@ -190,7 +190,7 @@ func TestCreateBackupRepositoryAndWait(t *testing.T) { namespace: "fake-ns", bsl: "fake-bsl", repositoryType: "fake-repo-type", - err: "unable to create backup repository resource: no kind is registered for the type v1.BackupRepository in scheme \"pkg/runtime/scheme.go:100\"", + err: "unable to create backup repository resource: no kind is registered for the type v1.BackupRepository in scheme", }, { name: "get repo fail", @@ -252,7 +252,7 @@ func TestCreateBackupRepositoryAndWait(t *testing.T) { RepositoryType: test.repositoryType, }) if err != nil { - require.EqualError(t, err, test.err) + require.ErrorContains(t, err, test.err) } else { require.NoError(t, err) } diff --git a/pkg/repository/maintenance/maintenance.go b/pkg/repository/maintenance/maintenance.go index 16a94535f..e43918d4d 100644 --- a/pkg/repository/maintenance/maintenance.go +++ b/pkg/repository/maintenance/maintenance.go @@ -449,6 +449,35 @@ func StartNewJob( return maintenanceJob.Name, nil } +// buildTolerationsForMaintenanceJob builds the tolerations for maintenance jobs. +// It includes the required Windows toleration for backward compatibility and filters +// tolerations from the Velero deployment to only include those with keys that are +// in the ThirdPartyTolerations allowlist, following the same pattern as labels and annotations. +func buildTolerationsForMaintenanceJob(deployment *appsv1api.Deployment) []corev1api.Toleration { + // Start with the Windows toleration for backward compatibility + windowsToleration := corev1api.Toleration{ + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + } + result := []corev1api.Toleration{windowsToleration} + + // Filter tolerations from the Velero deployment to only include allowed ones + // Only tolerations that exist on the deployment AND have keys in the allowlist are inherited + deploymentTolerations := veleroutil.GetTolerationsFromVeleroServer(deployment) + for _, k := range util.ThirdPartyTolerations { + for _, toleration := range deploymentTolerations { + if toleration.Key == k { + result = append(result, toleration) + break // Only add the first matching toleration for each allowed key + } + } + } + + return result +} + func getPriorityClassName(ctx context.Context, cli client.Client, config *velerotypes.JobConfigs, logger logrus.FieldLogger) string { // Use the priority class name from the global job configuration if available // Note: Priority class is only read from global config, not per-repository @@ -593,15 +622,8 @@ func buildJob( SecurityContext: podSecurityContext, Volumes: volumes, ServiceAccountName: serviceAccount, - Tolerations: []corev1api.Toleration{ - { - Key: "os", - Operator: "Equal", - Effect: "NoSchedule", - Value: "windows", - }, - }, - ImagePullSecrets: imagePullSecrets, + Tolerations: buildTolerationsForMaintenanceJob(deployment), + ImagePullSecrets: imagePullSecrets, }, }, }, diff --git a/pkg/repository/maintenance/maintenance_test.go b/pkg/repository/maintenance/maintenance_test.go index 3dd12e5fa..93d8f9b2f 100644 --- a/pkg/repository/maintenance/maintenance_test.go +++ b/pkg/repository/maintenance/maintenance_test.go @@ -698,7 +698,7 @@ func TestWaitAllJobsComplete(t *testing.T) { { name: "list job error", runtimeScheme: schemeFail, - expectedError: "error listing maintenance job for repo fake-repo: no kind is registered for the type v1.JobList in scheme \"pkg/runtime/scheme.go:100\"", + expectedError: "error listing maintenance job for repo fake-repo: no kind is registered for the type v1.JobList in scheme", }, { name: "job not exist", @@ -847,7 +847,7 @@ func TestWaitAllJobsComplete(t *testing.T) { history, err := WaitAllJobsComplete(test.ctx, fakeClient, repo, 3, velerotest.NewLogger()) if test.expectedError != "" { - require.EqualError(t, err, test.expectedError) + require.ErrorContains(t, err, test.expectedError) } else { require.NoError(t, err) } @@ -1481,3 +1481,291 @@ func TestBuildJobWithPriorityClassName(t *testing.T) { }) } } + +func TestBuildTolerationsForMaintenanceJob(t *testing.T) { + windowsToleration := corev1api.Toleration{ + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + } + + testCases := []struct { + name string + deploymentTolerations []corev1api.Toleration + expectedTolerations []corev1api.Toleration + }{ + { + name: "no tolerations should only include Windows toleration", + deploymentTolerations: nil, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "empty tolerations should only include Windows toleration", + deploymentTolerations: []corev1api.Toleration{}, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "non-allowed toleration should not be inherited", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "vng-ondemand", + Operator: "Equal", + Effect: "NoSchedule", + Value: "amd64", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "allowed toleration should be inherited", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + }, + }, + { + name: "mixed allowed and non-allowed tolerations should only inherit allowed", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "vng-ondemand", // not in allowlist + Operator: "Equal", + Effect: "NoSchedule", + Value: "amd64", + }, + { + Key: "CriticalAddonsOnly", // in allowlist + Operator: "Exists", + Effect: "NoSchedule", + }, + { + Key: "custom-key", // not in allowlist + Operator: "Equal", + Effect: "NoSchedule", + Value: "custom-value", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + { + name: "multiple allowed tolerations should all be inherited", + deploymentTolerations: []corev1api.Toleration{ + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a deployment with the specified tolerations + deployment := &appsv1api.Deployment{ + Spec: appsv1api.DeploymentSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Tolerations: tc.deploymentTolerations, + }, + }, + }, + } + + result := buildTolerationsForMaintenanceJob(deployment) + assert.Equal(t, tc.expectedTolerations, result) + }) + } +} + +func TestBuildJobWithTolerationsInheritance(t *testing.T) { + // Define allowed tolerations that would be set on Velero deployment + allowedTolerations := []corev1api.Toleration{ + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + } + + // Mixed tolerations (allowed and non-allowed) + mixedTolerations := []corev1api.Toleration{ + { + Key: "vng-ondemand", // not in allowlist + Operator: "Equal", + Effect: "NoSchedule", + Value: "amd64", + }, + { + Key: "CriticalAddonsOnly", // in allowlist + Operator: "Exists", + Effect: "NoSchedule", + }, + } + + // Windows toleration that should always be present + windowsToleration := corev1api.Toleration{ + Key: "os", + Operator: "Equal", + Effect: "NoSchedule", + Value: "windows", + } + + testCases := []struct { + name string + deploymentTolerations []corev1api.Toleration + expectedTolerations []corev1api.Toleration + }{ + { + name: "no tolerations on deployment should only have Windows toleration", + deploymentTolerations: nil, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + }, + }, + { + name: "allowed tolerations should be inherited along with Windows toleration", + deploymentTolerations: allowedTolerations, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "kubernetes.azure.com/scalesetpriority", + Operator: "Equal", + Effect: "NoSchedule", + Value: "spot", + }, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + { + name: "mixed tolerations should only inherit allowed ones", + deploymentTolerations: mixedTolerations, + expectedTolerations: []corev1api.Toleration{ + windowsToleration, + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a new scheme and add necessary API types + localScheme := runtime.NewScheme() + err := velerov1api.AddToScheme(localScheme) + require.NoError(t, err) + err = appsv1api.AddToScheme(localScheme) + require.NoError(t, err) + err = batchv1api.AddToScheme(localScheme) + require.NoError(t, err) + + // Create a deployment with the specified tolerations + deployment := &appsv1api.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "velero", + Namespace: "velero", + }, + Spec: appsv1api.DeploymentSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Name: "velero", + Image: "velero/velero:latest", + }, + }, + Tolerations: tc.deploymentTolerations, + }, + }, + }, + } + + // Create a backup repository + repo := &velerov1api.BackupRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-repo", + Namespace: "velero", + }, + Spec: velerov1api.BackupRepositorySpec{ + VolumeNamespace: "velero", + BackupStorageLocation: "default", + }, + } + + // Create fake client and add the deployment + client := fake.NewClientBuilder().WithScheme(localScheme).WithObjects(deployment).Build() + + // Create minimal job configs and resources + jobConfig := &velerotypes.JobConfigs{} + logLevel := logrus.InfoLevel + logFormat := logging.NewFormatFlag() + logFormat.Set("text") + + // Call buildJob + job, err := buildJob(client, t.Context(), repo, "default", jobConfig, logLevel, logFormat, logrus.New()) + require.NoError(t, err) + + // Verify the tolerations are set correctly + assert.Equal(t, tc.expectedTolerations, job.Spec.Template.Spec.Tolerations) + }) + } +} diff --git a/pkg/util/csi/volume_snapshot.go b/pkg/util/csi/volume_snapshot.go index 7d00664cd..8e59dd69f 100644 --- a/pkg/util/csi/volume_snapshot.go +++ b/pkg/util/csi/volume_snapshot.go @@ -447,8 +447,13 @@ func GetVolumeSnapshotClassForStorageClass( return &vsClass, nil } return nil, fmt.Errorf( - "failed to get VolumeSnapshotClass for provisioner %s, ensure that the desired VolumeSnapshot class has the %s label or %s annotation", - provisioner, velerov1api.VolumeSnapshotClassSelectorLabel, velerov1api.VolumeSnapshotClassKubernetesAnnotation) + "failed to get VolumeSnapshotClass for provisioner %s: "+ + "ensure that the desired VolumeSnapshotClass has the %s label or %s annotation, "+ + "and that its driver matches the StorageClass provisioner", + provisioner, + velerov1api.VolumeSnapshotClassSelectorLabel, + velerov1api.VolumeSnapshotClassKubernetesAnnotation, + ) } // IsVolumeSnapshotClassHasListerSecret returns whether a volumesnapshotclass has a snapshotlister secret diff --git a/pkg/util/kube/event.go b/pkg/util/kube/event.go index a216853cd..ae514f4da 100644 --- a/pkg/util/kube/event.go +++ b/pkg/util/kube/event.go @@ -16,6 +16,7 @@ limitations under the License. package kube import ( + "context" "math" "sync" "time" @@ -182,13 +183,13 @@ func (es *eventSink) Create(event *corev1api.Event) (*corev1api.Event, error) { return event, nil } - return es.sink.CreateWithEventNamespace(event) + return es.sink.CreateWithEventNamespaceWithContext(context.Background(), event) } func (es *eventSink) Update(event *corev1api.Event) (*corev1api.Event, error) { - return es.sink.UpdateWithEventNamespace(event) + return es.sink.UpdateWithEventNamespaceWithContext(context.Background(), event) } func (es *eventSink) Patch(event *corev1api.Event, data []byte) (*corev1api.Event, error) { - return es.sink.PatchWithEventNamespace(event, data) + return es.sink.PatchWithEventNamespaceWithContext(context.Background(), event, data) } diff --git a/test/Makefile b/test/Makefile index 4eb6103e5..e24efce95 100644 --- a/test/Makefile +++ b/test/Makefile @@ -184,7 +184,7 @@ ginkgo: ${GOBIN}/ginkgo # This target does not run if ginkgo is already in $GOBIN ${GOBIN}/ginkgo: - GOBIN=${GOBIN} go install github.com/onsi/ginkgo/v2/ginkgo@v2.19.0 + GOBIN=${GOBIN} go install github.com/onsi/ginkgo/v2/ginkgo@v2.22.0 .PHONY: run-e2e run-e2e: ginkgo diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 0e632dcc4..71d501e60 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -39,6 +39,7 @@ import ( . "github.com/vmware-tanzu/velero/test/e2e/basic/resources-check" . "github.com/vmware-tanzu/velero/test/e2e/bsl-mgmt" . "github.com/vmware-tanzu/velero/test/e2e/migration" + . "github.com/vmware-tanzu/velero/test/e2e/nodeagentconfig" . "github.com/vmware-tanzu/velero/test/e2e/parallelfilesdownload" . "github.com/vmware-tanzu/velero/test/e2e/parallelfilesupload" . "github.com/vmware-tanzu/velero/test/e2e/privilegesmgmt" @@ -673,6 +674,12 @@ var _ = Describe( SpecificRepoMaintenanceTest, ) +var _ = Describe( + "Test node agent config's LoadAffinity part", + Label("NodeAgentConfig", "LoadAffinity"), + LoadAffinities, +) + func GetKubeConfigContext() error { var err error var tcDefault, tcStandby k8s.TestClient @@ -753,7 +760,7 @@ var _ = BeforeSuite(func() { ).To(Succeed()) } - // Create the needed PriorityClasses + By("Install PriorityClasses for E2E.") Expect(veleroutil.CreatePriorityClasses( context.Background(), test.VeleroCfg.ClientToInstallVelero.Kubebuilder, @@ -783,6 +790,8 @@ var _ = AfterSuite(func() { test.StorageClassName, ), ).To(Succeed()) + + By("Delete PriorityClasses created by E2E") Expect( k8s.DeleteStorageClass( ctx, diff --git a/test/e2e/migration/migration.go b/test/e2e/migration/migration.go index b26f87f38..f91bcffff 100644 --- a/test/e2e/migration/migration.go +++ b/test/e2e/migration/migration.go @@ -342,7 +342,7 @@ func (m *migrationE2E) Restore() error { Expect(veleroutil.InstallStorageClasses( m.VeleroCfg.StandbyClusterCloudProvider)).To(Succeed()) - // Create the needed PriorityClasses + By("Install PriorityClass for E2E.") Expect(veleroutil.CreatePriorityClasses( context.Background(), test.VeleroCfg.StandbyClient.Kubebuilder, @@ -453,6 +453,7 @@ func (m *migrationE2E) Clean() error { Expect(k8sutil.KubectlConfigUseContext( m.Ctx, m.VeleroCfg.StandbyClusterContext)).To(Succeed()) + m.VeleroCfg.ClientToInstallVelero = m.VeleroCfg.StandbyClient m.VeleroCfg.ClusterToInstallVelero = m.VeleroCfg.StandbyClusterName @@ -465,7 +466,6 @@ func (m *migrationE2E) Clean() error { fmt.Println("Fail to delete StorageClass1: ", err) return } - if err := k8sutil.DeleteStorageClass( m.Ctx, *m.VeleroCfg.ClientToInstallVelero, @@ -475,6 +475,12 @@ func (m *migrationE2E) Clean() error { return } + By("Delete PriorityClasses created by E2E") + Expect(veleroutil.DeletePriorityClasses( + m.Ctx, + m.VeleroCfg.ClientToInstallVelero.Kubebuilder, + )).To(Succeed()) + if strings.EqualFold(m.VeleroCfg.Features, test.FeatureCSI) && m.VeleroCfg.UseVolumeSnapshots { By("Delete VolumeSnapshotClass created by E2E") diff --git a/test/e2e/nodeagentconfig/node-agent-config.go b/test/e2e/nodeagentconfig/node-agent-config.go new file mode 100644 index 000000000..1b46eed65 --- /dev/null +++ b/test/e2e/nodeagentconfig/node-agent-config.go @@ -0,0 +1,347 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeagentconfig + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + . "github.com/onsi/gomega" + "github.com/pkg/errors" + corev1api "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1" + "github.com/vmware-tanzu/velero/pkg/builder" + velerotypes "github.com/vmware-tanzu/velero/pkg/types" + "github.com/vmware-tanzu/velero/pkg/util/kube" + velerokubeutil "github.com/vmware-tanzu/velero/pkg/util/kube" + "github.com/vmware-tanzu/velero/test" + . "github.com/vmware-tanzu/velero/test/e2e/test" + k8sutil "github.com/vmware-tanzu/velero/test/util/k8s" + veleroutil "github.com/vmware-tanzu/velero/test/util/velero" +) + +type NodeAgentConfigTestCase struct { + TestCase + nodeAgentConfigs velerotypes.NodeAgentConfigs + nodeAgentConfigMapName string +} + +var LoadAffinities func() = TestFunc(&NodeAgentConfigTestCase{ + nodeAgentConfigs: velerotypes.NodeAgentConfigs{ + LoadAffinity: []*kube.LoadAffinity{ + { + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "beta.kubernetes.io/arch": "amd64", + }, + }, + StorageClass: test.StorageClassName, + }, + { + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "kubernetes.io/arch": "amd64", + }, + }, + StorageClass: test.StorageClassName2, + }, + }, + BackupPVCConfig: map[string]velerotypes.BackupPVC{ + test.StorageClassName: { + StorageClass: test.StorageClassName2, + }, + }, + RestorePVCConfig: &velerotypes.RestorePVC{ + IgnoreDelayBinding: true, + }, + PriorityClassName: test.PriorityClassNameForDataMover, + }, + nodeAgentConfigMapName: "node-agent-config", +}) + +func (n *NodeAgentConfigTestCase) Init() error { + // generate random number as UUIDgen and set one default timeout duration + n.TestCase.Init() + + // generate variable names based on CaseBaseName + UUIDgen + n.CaseBaseName = "node-agent-config-" + n.UUIDgen + n.BackupName = "backup-" + n.CaseBaseName + n.RestoreName = "restore-" + n.CaseBaseName + + // generate namespaces by NamespacesTotal + n.NamespacesTotal = 1 + n.NSIncluded = &[]string{} + for nsNum := 0; nsNum < n.NamespacesTotal; nsNum++ { + createNSName := fmt.Sprintf("%s-%00000d", n.CaseBaseName, nsNum) + *n.NSIncluded = append(*n.NSIncluded, createNSName) + } + + // assign values to the inner variable for specific case + n.VeleroCfg.UseNodeAgent = true + n.VeleroCfg.UseNodeAgentWindows = true + + // Need to verify the data mover pod content, so don't wait until backup completion. + n.BackupArgs = []string{ + "create", "--namespace", n.VeleroCfg.VeleroNamespace, "backup", n.BackupName, + "--include-namespaces", strings.Join(*n.NSIncluded, ","), + "--snapshot-volumes=true", "--snapshot-move-data", + } + + // Need to verify the data mover pod content, so don't wait until restore completion. + n.RestoreArgs = []string{ + "create", "--namespace", n.VeleroCfg.VeleroNamespace, "restore", n.RestoreName, + "--from-backup", n.BackupName, + } + + // Message output by ginkgo + n.TestMsg = &TestMSG{ + Desc: "Validate Node Agent ConfigMap configuration", + FailedMSG: "Failed to apply and / or validate configuration in VGDP pod.", + Text: "Should be able to apply and validate configuration in VGDP pod.", + } + return nil +} + +func (n *NodeAgentConfigTestCase) InstallVelero() error { + // Because this test needs to use customized Node Agent ConfigMap, + // need to uninstall and reinstall Velero. + + fmt.Println("Start to uninstall Velero") + if err := veleroutil.VeleroUninstall(n.Ctx, n.VeleroCfg); err != nil { + fmt.Printf("Fail to uninstall Velero: %s\n", err.Error()) + return err + } + + result, err := json.Marshal(n.nodeAgentConfigs) + if err != nil { + return err + } + + repoMaintenanceConfig := builder.ForConfigMap(n.VeleroCfg.VeleroNamespace, n.nodeAgentConfigMapName). + Data("node-agent-config", string(result)).Result() + + n.VeleroCfg.NodeAgentConfigMap = n.nodeAgentConfigMapName + + return veleroutil.PrepareVelero( + n.Ctx, + n.CaseBaseName, + n.VeleroCfg, + repoMaintenanceConfig, + ) +} + +func (n *NodeAgentConfigTestCase) CreateResources() error { + for _, ns := range *n.NSIncluded { + if err := k8sutil.CreateNamespace(n.Ctx, n.Client, ns); err != nil { + fmt.Printf("Fail to create ns %s: %s\n", ns, err.Error()) + return err + } + + pvc, err := k8sutil.CreatePVC(n.Client, ns, "volume-1", test.StorageClassName, nil) + if err != nil { + fmt.Printf("Fail to create PVC %s: %s\n", "volume-1", err.Error()) + return err + } + + vols := k8sutil.CreateVolumes(pvc.Name, []string{"volume-1"}) + + deployment := k8sutil.NewDeployment( + n.CaseBaseName, + (*n.NSIncluded)[0], + 1, + map[string]string{"app": "test"}, + n.VeleroCfg.ImageRegistryProxy, + n.VeleroCfg.WorkerOS, + ).WithVolume(vols).Result() + + deployment, err = k8sutil.CreateDeployment(n.Client.ClientGo, ns, deployment) + if err != nil { + fmt.Printf("Fail to create deployment %s: %s \n", deployment.Name, err.Error()) + return errors.Wrap(err, fmt.Sprintf("failed to create deployment: %s", err.Error())) + } + + if err := k8sutil.WaitForReadyDeployment(n.Client.ClientGo, deployment.Namespace, deployment.Name); err != nil { + fmt.Printf("Fail to create deployment %s: %s\n", n.CaseBaseName, err.Error()) + return err + } + } + + return nil +} + +func (n *NodeAgentConfigTestCase) Backup() error { + if err := veleroutil.VeleroCmdExec(n.Ctx, n.VeleroCfg.VeleroCLI, n.BackupArgs); err != nil { + return err + } + + backupPodList := new(corev1api.PodList) + + wait.PollUntilContextTimeout(n.Ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + duList := new(velerov2alpha1api.DataUploadList) + if err := n.VeleroCfg.ClientToInstallVelero.Kubebuilder.List( + n.Ctx, + duList, + &client.ListOptions{Namespace: n.VeleroCfg.VeleroNamespace}, + ); err != nil { + fmt.Printf("Fail to list DataUpload: %s\n", err.Error()) + return false, fmt.Errorf("Fail to list DataUpload: %w", err) + } else { + if len(duList.Items) <= 0 { + fmt.Println("No DataUpload found yet. Continue polling.") + return false, nil + } + } + + if err := n.VeleroCfg.ClientToInstallVelero.Kubebuilder.List( + n.Ctx, + backupPodList, + &client.ListOptions{ + LabelSelector: labels.SelectorFromSet(map[string]string{ + velerov1api.DataUploadLabel: duList.Items[0].Name, + }), + }); err != nil { + fmt.Printf("Fail to list backupPod %s\n", err.Error()) + return false, errors.Wrapf(err, "error to list backup pods") + } else { + if len(backupPodList.Items) <= 0 { + fmt.Println("No backupPod found yet. Continue polling.") + return false, nil + } + } + + return true, nil + }) + + fmt.Println("Start to verify backupPod content.") + + Expect(backupPodList.Items[0].Spec.PriorityClassName).To(Equal(n.nodeAgentConfigs.PriorityClassName)) + + // In backup, only the second element of LoadAffinity array should be used. + expectedAffinity := velerokubeutil.ToSystemAffinity(n.nodeAgentConfigs.LoadAffinity[1:]) + + Expect(backupPodList.Items[0].Spec.Affinity).To(Equal(expectedAffinity)) + + fmt.Println("backupPod content verification completed successfully.") + + wait.PollUntilContextTimeout(n.Ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + backup := new(velerov1api.Backup) + if err := n.VeleroCfg.ClientToInstallVelero.Kubebuilder.Get( + n.Ctx, + client.ObjectKey{Namespace: n.VeleroCfg.VeleroNamespace, Name: n.BackupName}, + backup, + ); err != nil { + return false, err + } + + if backup.Status.Phase != velerov1api.BackupPhaseCompleted && + backup.Status.Phase != velerov1api.BackupPhaseFailed && + backup.Status.Phase != velerov1api.BackupPhasePartiallyFailed { + fmt.Printf("backup status is %s. Continue polling until backup reach to a final state.\n", backup.Status.Phase) + return false, nil + } + + return true, nil + }) + + return nil +} + +func (n *NodeAgentConfigTestCase) Restore() error { + if err := veleroutil.VeleroCmdExec(n.Ctx, n.VeleroCfg.VeleroCLI, n.RestoreArgs); err != nil { + return err + } + + restorePodList := new(corev1api.PodList) + + wait.PollUntilContextTimeout(n.Ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + ddList := new(velerov2alpha1api.DataDownloadList) + if err := n.VeleroCfg.ClientToInstallVelero.Kubebuilder.List( + n.Ctx, + ddList, + &client.ListOptions{Namespace: n.VeleroCfg.VeleroNamespace}, + ); err != nil { + fmt.Printf("Fail to list DataDownload: %s\n", err.Error()) + return false, fmt.Errorf("Fail to list DataDownload %w", err) + } else { + if len(ddList.Items) <= 0 { + fmt.Println("No DataDownload found yet. Continue polling.") + return false, nil + } + } + + if err := n.VeleroCfg.ClientToInstallVelero.Kubebuilder.List( + n.Ctx, + restorePodList, + &client.ListOptions{ + LabelSelector: labels.SelectorFromSet(map[string]string{ + velerov1api.DataDownloadLabel: ddList.Items[0].Name, + }), + }); err != nil { + fmt.Printf("Fail to list restorePod %s\n", err.Error()) + return false, errors.Wrapf(err, "error to list restore pods") + } else { + if len(restorePodList.Items) <= 0 { + fmt.Println("No restorePod found yet. Continue polling.") + return false, nil + } + } + + return true, nil + }) + + fmt.Println("Start to verify restorePod content.") + + Expect(restorePodList.Items[0].Spec.PriorityClassName).To(Equal(n.nodeAgentConfigs.PriorityClassName)) + + // In restore, only the first element of LoadAffinity array should be used. + expectedAffinity := velerokubeutil.ToSystemAffinity(n.nodeAgentConfigs.LoadAffinity[:1]) + + Expect(restorePodList.Items[0].Spec.Affinity).To(Equal(expectedAffinity)) + + fmt.Println("restorePod content verification completed successfully.") + + wait.PollUntilContextTimeout(n.Ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + restore := new(velerov1api.Restore) + if err := n.VeleroCfg.ClientToInstallVelero.Kubebuilder.Get( + n.Ctx, + client.ObjectKey{Namespace: n.VeleroCfg.VeleroNamespace, Name: n.RestoreName}, + restore, + ); err != nil { + return false, err + } + + if restore.Status.Phase != velerov1api.RestorePhaseCompleted && + restore.Status.Phase != velerov1api.RestorePhaseFailed && + restore.Status.Phase != velerov1api.RestorePhasePartiallyFailed { + fmt.Printf("restore status is %s. Continue polling until restore reach to a final state.\n", restore.Status.Phase) + return false, nil + } + + return true, nil + }) + + return nil +} diff --git a/test/util/velero/install.go b/test/util/velero/install.go index ca6e207d1..6ffa8812d 100644 --- a/test/util/velero/install.go +++ b/test/util/velero/install.go @@ -163,6 +163,7 @@ func VeleroInstall(ctx context.Context, veleroCfg *test.VeleroConfig, isStandbyC veleroCfg.VeleroNamespace, ) } + veleroCfg.BackupRepoConfigMap = test.BackupRepositoryConfigName // Install the passed-in objects in Velero installed namespace for _, obj := range objects { @@ -654,7 +655,7 @@ func patchResources(resources *unstructured.UnstructuredList, namespace string, APIVersion: corev1api.SchemeGroupVersion.String(), }, ObjectMeta: metav1.ObjectMeta{ - Name: "restic-restore-action-config", + Name: "fs-restore-action-config", Namespace: namespace, Labels: map[string]string{ "velero.io/plugin-config": "", @@ -671,7 +672,7 @@ func patchResources(resources *unstructured.UnstructuredList, namespace string, return errors.Wrapf(err, "failed to convert restore action config to unstructure") } resources.Items = append(resources.Items, un) - fmt.Printf("the restic restore helper image is set by the configmap %q \n", "restic-restore-action-config") + fmt.Printf("the restic restore helper image is set by the configmap %q \n", "fs-restore-action-config") } return nil