Compare commits

..

175 Commits

Author SHA1 Message Date
Xun Jiang/Bruce Jiang
540dfebf07 Merge branch 'main' into copilot/fix-9122
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m10s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Signed-off-by: Xun Jiang/Bruce Jiang <59276555+blackpiglet@users.noreply.github.com>
2025-12-02 16:42:35 +08:00
Xun Jiang
7b7b6bc2db Add the node-agent ConfigMap document.
Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-12-02 16:41:23 +08:00
Xun Jiang/Bruce Jiang
d96434c8c9 Merge pull request #9424 from vmware-tanzu/bump_golang_to_1.25
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m22s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
build-image / Build (push) Failing after 14s
Main CI / get-go-version (push) Successful in 8s
Main CI / Build (push) Failing after 27s
Close stale issues and PRs / stale (push) Successful in 15s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m29s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m6s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m10s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m15s
Bump Golang version from 1.24-bookworm to 1.25-bookworm
2025-11-27 17:53:16 +08:00
Xun Jiang
64e3643006 Fix linter error reported.
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m17s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-11-26 14:16:42 +08:00
Xun Jiang
758f6a4847 Bump Golang version from 1.24-bookworm to 1.25-bookworm
Bump golangci-lint to v1.25.0, because golangci-lint start to support
Golang v1.25 since v1.24.0, and v1.26.x was not stable yet.
Align action pr-linter-check's golangci-lint version to v1.25.0

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-11-26 14:16:42 +08:00
Xun Jiang/Bruce Jiang
f6b3852d2f Merge pull request #9427 from vmware-tanzu/dependabot/github_actions/actions/checkout-6
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m4s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 10s
Main CI / Build (push) Failing after 25s
Close stale issues and PRs / stale (push) Successful in 13s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m33s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m11s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m12s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m20s
Bump actions/checkout from 5 to 6
2025-11-26 14:15:06 +08:00
dependabot[bot]
981b29b4cb Bump actions/checkout from 5 to 6
Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 19:25:51 +00:00
Daniel Jiang
7688579f75 Merge pull request #9423 from Lyndon-Li/fix-linter-error
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m8s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 11s
Main CI / Build (push) Failing after 28s
Close stale issues and PRs / stale (push) Successful in 12s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m25s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m13s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m10s
Fix linter error
2025-11-24 18:29:28 +08:00
Lyndon-Li
e63486b677 fix-linter-error
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-24 17:26:59 +08:00
Lyndon-Li
bea82a61d6 Merge branch 'main' into fix-linter-error 2025-11-24 17:24:56 +08:00
lyndon-li
3fc33d3c46 Merge pull request #9421 from vmware-tanzu/dependabot/go_modules/golang.org/x/crypto-0.45.0
Bump golang.org/x/crypto from 0.40.0 to 0.45.0
2025-11-24 17:21:20 +08:00
Lyndon-Li
99d87aae5b fix linter error
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-24 17:21:00 +08:00
dependabot[bot]
960a596e7b Bump golang.org/x/crypto from 0.40.0 to 0.45.0
Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.40.0 to 0.45.0.
- [Commits](https://github.com/golang/crypto/compare/v0.40.0...v0.45.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.45.0
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 17:09:09 +08:00
lyndon-li
695a94707d Merge pull request #9420 from Lyndon-Li/max-go-procs-doc
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 50s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 13s
Main CI / Build (push) Failing after 25s
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m26s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m14s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m7s
Issue 9194: add doc for GOMAXPROCS behavior change
2025-11-21 14:26:06 +08:00
lyndon-li
8d7957dfae Merge pull request #9419 from shubham-pampattiwar/fix-vgs-volume-policy-9344
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 53s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 9s
Main CI / Build (push) Failing after 26s
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m20s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 56s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 50s
Apply volume policies to VolumeGroupSnapshot PVC filtering
2025-11-20 14:28:24 +08:00
Lyndon-Li
a3169aeff3 add doc for GOMAXPROCS behavior change
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-20 10:42:12 +08:00
Shubham Pampattiwar
e4726b2389 Merge pull request #9418 from Lyndon-Li/cache-volume-doc
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 53s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 10s
Main CI / Build (push) Failing after 25s
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m27s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m6s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m5s
Issue 9276: doc for cache volume
2025-11-18 21:29:09 -08:00
lyndon-li
9dc27555bc Update site/content/docs/main/data-movement-cache-volume.md
Co-authored-by: Tiger Kaovilai <passawit.kaovilai@gmail.com>
Signed-off-by: lyndon-li <98304688+Lyndon-Li@users.noreply.github.com>
2025-11-19 10:50:25 +08:00
lyndon-li
39892abef2 Update site/content/docs/main/data-movement-cache-volume.md
Co-authored-by: Tiger Kaovilai <passawit.kaovilai@gmail.com>
Signed-off-by: lyndon-li <98304688+Lyndon-Li@users.noreply.github.com>
2025-11-19 10:50:16 +08:00
Shubham Pampattiwar
c565da2ea6 Fix linter error: use t.Context() instead of context.Background()
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-11-18 15:58:09 -08:00
Shubham Pampattiwar
324c2fb448 Document volume policy interaction with VolumeGroupSnapshots
Add documentation explaining how volume policies are applied before
VGS grouping, including examples and troubleshooting guidance for the
multiple CSI drivers scenario.

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-11-18 15:46:30 -08:00
Shubham Pampattiwar
c870eb1645 Add changelog entry for PR #9419
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-11-18 15:42:53 -08:00
Shubham Pampattiwar
dc3da29f3e Apply volume policies to VolumeGroupSnapshot PVC filtering
VolumeGroupSnapshots were querying all PVCs with matching labels
directly from the cluster without respecting volume policies. This
caused errors when labeled PVCs included both CSI and non-CSI volumes,
or volumes from different CSI drivers that were excluded by policies.

This change filters PVCs by volume policy before VGS grouping,
ensuring only PVCs that should be snapshotted are included in the
group. A warning is logged when PVCs are excluded from VGS due to
volume policy.

Fixes #9344

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-11-18 15:40:30 -08:00
Lyndon-Li
2579ef1093 doc for cache volume
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-18 18:45:08 +08:00
Shubham Pampattiwar
fa374b6143 Merge pull request #9407 from Lyndon-Li/fix-write-repo-init-param-error
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m14s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 36s
Main CI / Build (push) Failing after 18s
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m36s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m11s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m2s
Issue 9400: connect repo first time after creation
2025-11-12 10:28:10 -08:00
Lyndon-Li
67cf896eaf fix backup repo init error
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-12 11:02:34 +08:00
Xun Jiang/Bruce Jiang
ad11b38468 Remove PVC node selection E2E test case. (#9405)
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 53s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 9s
Main CI / Build (push) Failing after 1m25s
Close stale issues and PRs / stale (push) Successful in 9s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 6m3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m58s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m40s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 13m59s
According to #7904, remove the corresponding E2E case.

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-11-11 13:04:47 -05:00
dependabot[bot]
b9cf90f11c Bump golangci/golangci-lint-action from 8 to 9 (#9404)
Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 8 to 9.
- [Release notes](https://github.com/golangci/golangci-lint-action/releases)
- [Commits](https://github.com/golangci/golangci-lint-action/compare/v8...v9)

---
updated-dependencies:
- dependency-name: golangci/golangci-lint-action
  dependency-version: '9'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-11 13:03:40 -05:00
lyndon-li
f947092f1a cache volume for PVR (#9397)
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-11 13:02:56 -05:00
Xun Jiang/Bruce Jiang
82367e7ff6 Fix the Job build error when BackupReposiotry name longer than 63. (#9350)
* Fix the Job build error when BackupReposiotry name longer than 63.

Fix the Job build error.
Consider the name length limitation change in job list code.
Use hash to replace the GetValidName function.

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>

* Use ref_name to replace ref.

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>

---------

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-11-11 12:56:27 -05:00
lyndon-li
df07c39014 Merge pull request #9368 from shubham-pampattiwar/fix-volume-info-generatename
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m21s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 15s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 3m52s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m6s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 57s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m4s
Track actual resource names for GenerateName in restore status
2025-11-10 14:48:15 +08:00
Wenkai Yin(尹文开)
f2538207f3 Merge pull request #9390 from kaovilai/fix-artifact-upload-conflict
Update debug bundle artifact name to include Kubernetes version and job index
2025-11-10 14:19:18 +08:00
lyndon-li
d1e5d6b13a Merge pull request #9367 from shubham-pampattiwar/fix-managed-fields-generatename
Fix managed fields patch for resources using GenerateName
2025-11-10 13:50:42 +08:00
lyndon-li
99145bee70 Merge pull request #9391 from Lyndon-Li/cache-volume-for-dd
Cache volume support for DataDownload
2025-11-10 11:00:36 +08:00
Xun Jiang/Bruce Jiang
9e5769c304 Merge pull request #9057 from Joeavaikath/feat/wildcard-namespaces
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 55s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 11s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 13s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 7m20s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 2m9s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m40s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m4s
Design: Feat/wildcard namespaces
2025-11-07 13:34:41 +08:00
Lyndon-Li
21b998e2c5 cache volume for data download
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-06 18:52:41 +08:00
Lyndon-Li
fbeab7291e Merge branch 'main' into cache-volume-for-dd 2025-11-06 15:12:31 +08:00
lyndon-li
d4a966481b Merge pull request #9379 from Lyndon-Li/repo-static-info-provider
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m6s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 10s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 13s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 7m58s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m41s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m13s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m0s
Repo provider interface refactor for repo static configuration
2025-11-06 15:11:43 +08:00
Tiger Kaovilai
6e54879f4d update debug bundle artifact name to include Kubernetes version and job index
Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
2025-11-05 14:25:39 -05:00
lyndon-li
e485258d25 Merge pull request #9389 from sseago/fs-backup-secctx
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 54s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 8s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 13s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 10m19s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m16s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m11s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m1s
don't copy securitycontext from first container if configmap found
2025-11-05 12:41:30 +08:00
Scott Seago
bd7d28f004 don't copy securitycontext from first container if configmap found
Signed-off-by: Scott Seago <sseago@redhat.com>
2025-11-04 16:56:35 -05:00
Lyndon-Li
7dbe2b4358 cache volume for data download
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-04 16:59:52 +08:00
Lyndon-Li
597cee545a repo provider interface refactor for repo static configuration
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-04 16:32:55 +08:00
Lyndon-Li
9556a39a89 repo provider interface refactor for repo static configuration
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-11-03 16:16:05 +08:00
lyndon-li
45755e14ee Merge pull request #9369 from vmware-tanzu/dependabot/github_actions/actions/upload-artifact-5
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 51s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 10s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 10s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 9m31s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m28s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m9s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m5s
Bump actions/upload-artifact from 4 to 5
2025-11-03 14:38:01 +08:00
lyndon-li
c907b316a5 Merge pull request #9370 from Lyndon-Li/cache-volume-configuration
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 47s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 11s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 14m2s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 5m12s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m59s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m15s
Add cache volume configuration
2025-11-01 10:38:54 +08:00
lyndon-li
75d69e1a04 Merge pull request #9362 from Lyndon-Li/cache-volume-for-exposer
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 58s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 13s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 12s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 50s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 44s
Exposer supports cache volume
2025-10-31 17:06:40 +08:00
lyndon-li
a95c90411c Merge branch 'main' into cache-volume-configuration 2025-10-29 15:50:07 +08:00
Lyndon-Li
7178946deb cache volume configuration
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-29 15:05:52 +08:00
lyndon-li
14d2f0b30b Merge pull request #9375 from Lyndon-Li/issue-fix-9365
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 59s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 9s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 16s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m18s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 56s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 53s
issue 9365: prevent multiple update of PVR
2025-10-29 14:58:39 +08:00
Lyndon-Li
31a7236c7d issue 9365: prevent multiple update of PVR
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-29 14:32:48 +08:00
lyndon-li
4111eb3940 Merge branch 'main' into cache-volume-for-exposer 2025-10-28 14:44:30 +08:00
Lyndon-Li
b1e5e4408f exposer supports cache volume
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-28 14:43:51 +08:00
lyndon-li
0d7ef85f98 Merge pull request #9353 from Lyndon-Li/cache-dir-for-udmprepo
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 49s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 11s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 1m34s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m28s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 55s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 55s
Cache dir for udmrepo
2025-10-28 13:58:47 +08:00
lyndon-li
94cf7b39a8 Merge pull request #9357 from sseago/incremental-bytes2
Add incrementalBytes to DU/PVB for reporting new/changed size
2025-10-28 13:18:49 +08:00
dependabot[bot]
5e9605131b Bump actions/upload-artifact from 4 to 5
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4 to 5.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-27 20:02:29 +00:00
Shubham Pampattiwar
c2840f1c74 Fix tests: populate createdName for all created resources
Update test expectations to include createdName field for resources
with action 'created'. Also ensure namespaces track their created
names when created via EnsureNamespaceExistsAndIsReady.

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-10-27 12:44:31 -07:00
Scott Seago
5fc76db8c0 Add incrementalSize to DU/PVB for reporting new/changed size
Signed-off-by: Scott Seago <sseago@redhat.com>
2025-10-27 15:38:31 -04:00
Shubham Pampattiwar
f9f0e48e04 Add changelog for PR 9367
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-10-27 12:11:06 -07:00
Shubham Pampattiwar
2e9998b20e Add changelog for PR 9368
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-10-27 12:07:55 -07:00
Shubham Pampattiwar
07f30d06b9 Track actual resource names for GenerateName in restore status
When restoring resources with GenerateName, Kubernetes assigns the actual name
after creation, but Velero only tracked the original name from the backup in
itemKey. This caused volume information collection to fail when trying to fetch
PVCs using the original name instead of the actual created name.

Example:
- Original PVC name from backup: "test-vm-disk-1"
- Actual created PVC name: "test-vm-backup-2025-10-27-test-vm-disk-1-mdjkd"
- Volume info tried to fetch: "test-vm-disk-1" → Failed with "not found"

This affects any plugin or workflow using GenerateName during restore:
- kubevirt-velero-plugin (VMFR use case with PVC collision avoidance)
- Custom restore item actions using generateName
- Secrets/ConfigMaps restored with generateName

Changes:
1. Add createdName field to restoredItemStatus struct (pkg/restore/request.go)
2. Capture actual name from createdObj.GetName() (pkg/restore/restore.go:1520)
3. Use createdName in RestoredResourceList() when available (pkg/restore/request.go:93-95)

This fix is backwards compatible:
- createdName defaults to empty string
- When empty, falls back to itemKey.name (original behavior)
- Only populated for GenerateName resources where needed

Fixes volume information collection errors like:
"Failed to get PVC" error="persistentvolumeclaims \"<original-name>\" not found"

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-10-27 12:04:46 -07:00
Shubham Pampattiwar
898fa13ed7 Fix managed fields patch for resources using GenerateName
When restoring resources with GenerateName (where name is empty and K8s
assigns the actual name), the managed fields patch was failing with error
"name is required" because it was using obj.GetName() which returns empty
for GenerateName resources.

The fix uses createdObj.GetName() instead, which contains the actual name
assigned by Kubernetes after resource creation.

This affects any resource using GenerateName for restore, including:
- PersistentVolumeClaims restored by kubevirt-velero-plugin
- Secrets and ConfigMaps created with generateName
- Any custom resources using generateName

Changes:
- Line 1707: Use createdObj.GetName() instead of obj.GetName() in Patch call
- Lines 1702, 1709, 1713, 1716: Use createdObj in error/info messages for accuracy

This is a backwards-compatible fix since:
- For resources WITHOUT generateName: obj.GetName() == createdObj.GetName()
- For resources WITH generateName: createdObj.GetName() has the actual name

The managed fields patch was already correctly using createdObj (lines 1698-1700),
only the Patch() call was incorrectly using obj.

Fixes restore status showing FinalizingPartiallyFailed with "name is required"
error when restoring resources with GenerateName.

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-10-27 11:57:55 -07:00
lyndon-li
f4517f131b Merge branch 'main' into cache-dir-for-udmprepo 2025-10-27 16:34:21 +08:00
lyndon-li
f4af6156a1 Merge pull request #9354 from Lyndon-Li/snapshot-size-support
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 54s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 9s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m19s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 48s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 55s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 49s
Snapshot size support
2025-10-27 13:50:06 +08:00
Lyndon-Li
d4147e406b snapshot size in restore CRs
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-27 04:41:04 +00:00
lyndon-li
aafb616c12 Merge branch 'main' into cache-dir-for-udmprepo 2025-10-24 17:34:25 +08:00
Lyndon-Li
44f3166f0f Merge branch 'main' into cache-volume-for-exposer 2025-10-24 17:27:02 +08:00
lyndon-li
fb38727b9c Merge branch 'main' into snapshot-size-support 2025-10-24 16:55:50 +08:00
lyndon-li
c29ed91442 Merge pull request #9148 from Lyndon-Li/backup-repo-cache-design
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m4s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 13s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 13s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m10s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 46s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 51s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 57s
Backup repo cache design
2025-10-24 14:30:59 +08:00
lyndon-li
6280ffddaa Merge branch 'main' into backup-repo-cache-design 2025-10-23 15:03:17 +08:00
Lyndon-Li
2e3f41be22 backup repo cache design
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-23 15:02:17 +08:00
Lyndon-Li
8d29051bbe expose supports cache volume
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-22 19:11:56 +08:00
Lyndon-Li
6dbe772590 snapshot size in restore CRs only
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-22 12:41:41 +08:00
Lyndon-Li
2aa319aa30 add snapshot size to data mover CRs
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-21 16:48:48 +08:00
Lyndon-Li
903ee21f31 Merge branch 'main' into snapshot-size-support 2025-10-21 16:47:16 +08:00
Lyndon-Li
a4e3dccdce Merge branch 'main' into cache-dir-for-udmprepo 2025-10-21 16:42:28 +08:00
Wenkai Yin(尹文开)
53e99556ad Merge pull request #9291 from Lyndon-Li/dont-connect-repo-in-repo-controller
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 55s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 9s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 9s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m16s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 52s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 47s
Don't connect repo in repo controller
2025-10-21 16:10:20 +08:00
Lyndon-Li
166f50d776 add snapshot size to data mover CRs
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-21 15:14:38 +08:00
Lyndon-Li
6c9699a06d udmrepo support cache dir
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-21 13:39:37 +08:00
Lyndon-Li
79b8cc40b1 Merge branch 'main' into cache-dir-for-udmprepo 2025-10-21 13:37:11 +08:00
Lyndon-Li
3e39cb4b0f udmrepo support cache dir
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-21 13:36:35 +08:00
lyndon-li
9b02402631 Merge pull request #9342 from Lyndon-Li/cache-dir-to-vgdp
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 55s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 7s
Main CI / Build (push) Has been skipped
Add cache configuration to VGDP
2025-10-21 13:19:00 +08:00
Lyndon-Li
6bd8033d24 add cache dir to VGDP
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-17 18:00:03 +08:00
Lyndon-Li
7cec76e445 Merge branch 'main' into cache-dir-to-vgdp 2025-10-17 17:58:01 +08:00
lyndon-li
420a65a116 Merge pull request #9269 from Lyndon-Li/deprecate-pvc-node-selection
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 57s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 8s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 10s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 52s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 53s
Deprecate pvc node selection feature
2025-10-17 15:48:34 +08:00
Wenkai Yin(尹文开)
3bf4a7dced Merge pull request #9312 from vmware-tanzu/dependabot/github_actions/actions/stale-10.1.0
Bump actions/stale from 10.0.0 to 10.1.0
2025-10-17 14:09:23 +08:00
lyndon-li
2a5804b595 Merge branch 'main' into deprecate-pvc-node-selection 2025-10-17 13:31:21 +08:00
lyndon-li
b9af3a1947 Merge branch 'main' into dont-connect-repo-in-repo-controller 2025-10-17 13:31:11 +08:00
lyndon-li
9a3fabbc55 issue 9332: make bytesDone correct for incremental backup (#9333)
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 58s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 8s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 10s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m18s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 50s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 47s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 48s
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-16 12:41:21 -04:00
lyndon-li
99a46ed818 Merge pull request #9329 from T4iFooN-IX/9327
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m9s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 11s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 17s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m22s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 56s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 56s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 53s
Fix typos (#9327)
2025-10-15 10:02:58 +08:00
Daniel Wituschek
93e8379530 Add changelog file
Signed-off-by: Daniel Wituschek <daniel.wituschek@intrexx.com>
2025-10-14 14:58:05 +02:00
lyndon-li
72ddfd7d78 Merge branch 'main' into deprecate-pvc-node-selection 2025-10-14 14:40:40 +08:00
lyndon-li
18260d88ca Merge branch 'main' into dont-connect-repo-in-repo-controller 2025-10-14 14:40:21 +08:00
Daniel Wituschek
8b3ba78c8c Fix typos
Signed-off-by: Daniel Wituschek <daniel.wituschek@intrexx.com>
2025-10-13 16:00:23 +02:00
Daniel Wituschek
b34f2deff2 Fix typos
Signed-off-by: Daniel Wituschek <daniel.wituschek@intrexx.com>
2025-10-13 15:58:20 +02:00
dependabot[bot]
e9666f9aea Bump actions/stale from 10.0.0 to 10.1.0
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m8s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Bumps [actions/stale](https://github.com/actions/stale) from 10.0.0 to 10.1.0.
- [Release notes](https://github.com/actions/stale/releases)
- [Changelog](https://github.com/actions/stale/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/stale/compare/v10.0.0...v10.1.0)

---
updated-dependencies:
- dependency-name: actions/stale
  dependency-version: 10.1.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-10 11:17:07 +08:00
lyndon-li
e6aab8ca93 add events to diagnose (#9296)
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m10s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 16s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m43s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m2s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m2s
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-10-09 14:13:43 -04:00
Xun Jiang/Bruce Jiang
99f12b85ba Merge pull request #9301 from vmware-tanzu/fix_action_isse
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 39s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 3s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Successful in 15s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m26s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 55s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 59s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 51s
Fix the push action invalid variable ref issue.
2025-09-30 16:40:02 +08:00
Xun Jiang/Bruce Jiang
f8938e7fed VerifyJSONConfigs verify every elements in Data. (#9302)
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 48s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 4s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Failing after 6s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 4s
Add error message in the velero install CLI output if VerifyJSONConfigs fail.
Only allow one element in node-agent-configmap's Data.

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-09-29 15:08:05 -04:00
Xun Jiang
cabb04575e Fix the push action invalid variable ref issue.
Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-09-27 23:33:37 +08:00
lyndon-li
60dbcbc60d Merge pull request #9295 from sseago/privileged-fs-backup-pods
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 44s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 4s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Failing after 6s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 3s
Privileged fs backup pods
2025-09-26 10:31:59 +08:00
Scott Seago
4ade8cf8a2 Add option for privileged fs-backup pod
Signed-off-by: Scott Seago <sseago@redhat.com>
2025-09-25 15:38:39 -04:00
Joseph
75f1817cba Simplify
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-09-25 09:28:20 -04:00
Lyndon-Li
78ddeef96c add cache dir to VGDP
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-09-25 18:19:23 +08:00
lyndon-li
814db6541f Merge branch 'main' into backup-repo-cache-design 2025-09-25 16:50:22 +08:00
Joseph
cf7a9495c5 Leaner design
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-09-25 04:40:13 -04:00
Joseph
14a6315667 update
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-09-25 04:09:59 -04:00
Daniel Jiang
826c73131e Merge pull request #9233 from Lyndon-Li/backup-pvc-to-different-node
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 59s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 4s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 2s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 3s
backupPVC to different node
2025-09-24 22:16:36 +08:00
lyndon-li
c90856de65 Merge branch 'main' into dont-connect-repo-in-repo-controller 2025-09-23 16:12:09 +08:00
Lyndon-Li
a0aac09f0a don't connect repo in repo controller
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-09-23 16:11:24 +08:00
lyndon-li
903caa9c02 Merge branch 'main' into backup-repo-cache-design 2025-09-23 11:47:22 +08:00
lyndon-li
031df8d5e0 Merge branch 'main' into deprecate-pvc-node-selection 2025-09-23 11:46:00 +08:00
lyndon-li
21691451e9 Merge branch 'main' into backup-pvc-to-different-node 2025-09-23 11:43:24 +08:00
lyndon-li
50d7b1cff1 Merge pull request #9248 from 0xLeo258/main
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m0s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 5s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Failing after 5m54s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 2m33s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m22s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m49s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m4s
Issue #9247: Protect VolumeSnapshot field from race condition
2025-09-23 11:39:57 +08:00
lyndon-li
37df853a99 Merge branch 'main' into deprecate-pvc-node-selection 2025-09-23 11:39:05 +08:00
lyndon-li
d545ad49ba Merge branch 'main' into main 2025-09-23 11:10:38 +08:00
lyndon-li
7831bf25b9 Merge pull request #9281 from 0xLeo258/issue9234
Issue #9234: Fix plugin reentry with safe VolumeSnapshotterCache
2025-09-23 11:06:21 +08:00
Joseph
0b40702900 Updates
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-09-22 17:31:22 -04:00
Xun Jiang/Bruce Jiang
2abe91e08c Merge pull request #9250 from blackpiglet/distinguish_go_version_for_main_and_other_release
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 42s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Failing after 3s
Main CI / Build (push) Has been skipped
Close stale issues and PRs / stale (push) Failing after 9s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 3s
Use different go version check logic for main and other branches.
2025-09-22 22:48:21 +08:00
0xLeo258
1ebe357d18 Add built-in mutex for SynchronizedVSList && Update unit tests
Signed-off-by: 0xLeo258 <noixe0312@gmail.com>
2025-09-20 09:13:07 +08:00
0xLeo258
9df17eb02b add changelog
Signed-off-by: 0xLeo258 <noixe0312@gmail.com>
2025-09-20 09:13:07 +08:00
0xLeo258
f2a27c3864 fix9247: Protect VolumeSnapshot field
Signed-off-by: 0xLeo258 <noixe0312@gmail.com>
2025-09-20 09:13:07 +08:00
lyndon-li
8ee3436f5c Merge branch 'main' into deprecate-pvc-node-selection 2025-09-19 17:24:38 +08:00
Xun Jiang
4847eeaf62 Use different go version check logic for main and other branches.
main branch will read go version from go.mod's go primitive, and
only keep major and minor version, because we want the actions to use
the lastest patch version automatically, even the go.mod specify version
like 1.24.0.
release branch can read the go version from go.mod file by setup-go
action's own logic.
Refactor the get Go version to reusable workflow.

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-09-19 16:58:18 +08:00
dependabot[bot]
1ec281a64e Bump actions/setup-go from 5 to 6 (#9231)
Some checks failed
Run the E2E test on kind / build (push) Failing after 10s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 5s
Close stale issues and PRs / stale (push) Failing after 2m15s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 4s
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5 to 6.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-09-18 12:29:45 -04:00
0xLeo258
25de1bb3b6 add changelog
Signed-off-by: 0xLeo258 <noixe0312@gmail.com>
2025-09-18 17:36:07 +08:00
0xLeo258
e21b21c19e fix 9234: Add safe VolumeSnapshotterCache
Signed-off-by: 0xLeo258 <noixe0312@gmail.com>
2025-09-18 17:21:25 +08:00
Xun Jiang/Bruce Jiang
b19cad9d01 Merge pull request #9280 from kaovilai/bitnamiminio
Some checks failed
Run the E2E test on kind / build (push) Failing after 3s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 3s
Fix E2E tests: Build MinIO from Bitnami Dockerfile to replace deprecated image
2025-09-18 14:25:41 +08:00
Tiger Kaovilai
9b6c4b1d47 Fix E2E tests: Build MinIO from Bitnami Dockerfile to replace deprecated image
The Bitnami MinIO image bitnami/minio:2021.6.17-debian-10-r7 is no longer
available on Docker Hub, causing E2E tests to fail.

This change implements a solution to build the MinIO image locally from
Bitnami's public Dockerfile and cache it for subsequent runs:
- Fetches the latest commit hash of the Bitnami MinIO Dockerfile
- Uses GitHub Actions cache to store/retrieve built images
- Only rebuilds when the upstream Dockerfile changes
- Maintains compatibility with existing environment variables

Fixes #9279

🤖 Generated with [Claude Code](https://claude.ai/code)

Update .github/workflows/e2e-test-kind.yaml

Signed-off-by: Tiger Kaovilai <passawit.kaovilai@gmail.com>
Co-Authored-By: Claude <noreply@anthropic.com>
Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
2025-09-17 19:08:07 -04:00
Lyndon-Li
b9159c22ca deprecate pvc node selection
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-09-17 15:22:42 +08:00
lyndon-li
112bea520e Merge branch 'main' into deprecate-pvc-node-selection 2025-09-17 15:19:40 +08:00
Lyndon-Li
7e15e9ba05 deprecate pvc node selection
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-09-17 15:09:54 +08:00
Wenkai Yin(尹文开)
f50cafa472 Merge pull request #9264 from shubham-pampattiwar/fix-backup-q-accum
Some checks failed
Run the E2E test on kind / build (push) Failing after 7s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 4s
Close stale issues and PRs / stale (push) Failing after 10m30s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 11s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 4s
Fix Schedule Backup Queue Accumulation During Extended Blocking Scenarios
2025-09-17 14:07:32 +08:00
Shubham Pampattiwar
a7b2985c83 add changelog file
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-09-15 16:07:40 -07:00
Shubham Pampattiwar
59289fba76 Fix Schedule Backup Queue Accumulation During Extended Blocking Scenarios
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-09-15 16:01:33 -07:00
Wenkai Yin(尹文开)
925479553a Merge pull request #9256 from shubham-pampattiwar/inhrerit-tolr-jobs
Some checks failed
Run the E2E test on kind / build (push) Failing after 9s
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 3s
Close stale issues and PRs / stale (push) Failing after 4m11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m1s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 5s
Fix maintenance jobs toleration inheritance from Velero deployment
2025-09-15 14:49:21 +08:00
lyndon-li
47340e67af Merge branch 'main' into backup-pvc-to-different-node 2025-09-12 13:30:34 +08:00
Lyndon-Li
25a7ef0e87 backupPVC to different node
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-09-12 13:27:58 +08:00
lyndon-li
799d596d5c Merge pull request #9226 from sseago/iba-perf
Some checks failed
Run the E2E test on kind / build (push) Failing after 5s
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 4s
Close stale issues and PRs / stale (push) Failing after 2m10s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 39s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 6s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 6s
Get pod list once per namespace in pvc IBA
2025-09-12 10:55:43 +08:00
Shubham Pampattiwar
5ba00dfb09 Fix maintenance jobs toleration inheritance from Velero deployment
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>

fix codespell and add changelog file

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-09-11 16:04:26 -07:00
Priyansh Choudhary
f1476defde Update AzureAD Microsoft Authentication Library to v1.5.0 (#9244)
Some checks failed
Run the E2E test on kind / build (push) Failing after 9s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 4s
Close stale issues and PRs / stale (push) Failing after 6m3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 38s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 2s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 6s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 11m22s
* Update AzureAD Microsoft Authentication Library to v1.5.0
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>

* Added Changelog
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>

---------

Signed-off-by: Priyansh Choudhary <im1706@gmail.com>
2025-09-11 14:07:46 -04:00
Xun Jiang/Bruce Jiang
67ff0dcbe0 Merge pull request #9240 from vmware-tanzu/update_velero_supported_k8s_versions
Some checks failed
Run the E2E test on kind / build (push) Failing after 8s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 4s
Add v1.34.0 for v1.17 compatible k8s versions.
2025-09-11 15:20:13 +08:00
lyndon-li
aad9dd9068 Merge branch 'main' into backup-pvc-to-different-node 2025-09-11 14:47:35 +08:00
lyndon-li
b636334079 Merge pull request #9241 from blackpiglet/bump_k8s_lib_to_1.33_for_main
Bump k8s library to v1.33.
2025-09-11 14:20:40 +08:00
lyndon-li
4d44705ed8 Merge branch 'main' into backup-pvc-to-different-node 2025-09-11 13:08:22 +08:00
Lyndon-Li
81c5b6692d backupPVC to different node
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-09-11 13:04:24 +08:00
dependabot[bot]
02edbc0c65 Bump actions/stale from 9.1.0 to 10.0.0 (#9232)
Some checks failed
Run the E2E test on kind / build (push) Failing after 4s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 4s
Bumps [actions/stale](https://github.com/actions/stale) from 9.1.0 to 10.0.0.
- [Release notes](https://github.com/actions/stale/releases)
- [Changelog](https://github.com/actions/stale/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/stale/compare/v9.1.0...v10.0.0)

---
updated-dependencies:
- dependency-name: actions/stale
  dependency-version: 10.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-09-10 16:44:18 -05:00
Xun Jiang
e8208097ba Bump k8s library to v1.33.
Replace deprecated EventExpansion method with WithContext methods.
Modify UTs.
Align the E2E ginkgo CLI version with go.mod

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-09-10 17:58:38 +08:00
Xun Jiang
4c30499340 Add v1.34.0 for v1.17 compatible k8s versions.
Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-09-10 17:28:23 +08:00
Scott Seago
2a9203f1b2 Get pod list once per namespace in pvc IBA
Signed-off-by: Scott Seago <sseago@redhat.com>
2025-09-09 13:19:06 -04:00
lyndon-li
3be76da952 Merge pull request #8991 from sseago/concurrent-backup-design
Some checks failed
Run the E2E test on kind / build (push) Failing after 2m18s
Run the E2E test on kind / setup-test-matrix (push) Successful in 1m20s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 43s
Close stale issues and PRs / stale (push) Successful in 14s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 7s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 3s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 4s
Concurrent backup design doc
2025-09-05 11:21:40 +08:00
Scott Seago
7132720a49 Concurrent backup design doc
Signed-off-by: Scott Seago <sseago@redhat.com>
2025-09-03 12:09:55 -04:00
Xun Jiang/Bruce Jiang
2dbfbc29e8 Merge pull request #9214 from weeix/patch-1
Some checks failed
Run the E2E test on kind / build (push) Failing after 7s
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 3s
Close stale issues and PRs / stale (push) Successful in 26s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 8s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 14m0s
clarify VolumeSnapshotClass error for mismatched driver/provisioner
2025-09-03 15:12:09 +08:00
weeix
80da461458 clarify VolumeSnapshotClass error for mismatched driver/provisioner
Signed-off-by: weeix <weeix@users.noreply.github.com>
2025-09-02 18:31:13 -05:00
Xun Jiang/Bruce Jiang
fdee2700a7 Merge pull request #9219 from blackpiglet/9157_e2e
Some checks failed
Run the E2E test on kind / build (push) Failing after 4s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 3s
Close stale issues and PRs / stale (push) Successful in 14s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 10s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 5s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 4s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 5s
Add E2E auto case for node-agent-config validation.
2025-09-02 22:37:33 +08:00
Xun Jiang
8e1c4a7dc5 Add E2E cases for node-agent-configmap.
Some checks failed
Run the E2E test on kind / build (push) Failing after 11s
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Fix the default BackupRepoConfig setting issue.
Delete PriorityClass in migration case clean stage.

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-09-02 15:03:20 +08:00
lyndon-li
09b5183fce Merge pull request #9173 from clementnuss/feat/backup-pvc-annotations
Some checks failed
Run the E2E test on kind / build (push) Failing after 8s
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 3s
Close stale issues and PRs / stale (push) Successful in 13s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 3m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 55s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 2s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 21s
feat: Permit specifying annotations for the BackupPVC
2025-08-29 16:46:30 +08:00
Clément Nussbaumer
c5b70b4a0d test: fix backuppvc annotations test case
Signed-off-by: Clément Nussbaumer <clement.nussbaumer@postfinance.ch>
2025-08-29 10:10:41 +02:00
Clément Nussbaumer
248a840918 feat: Permit specifying annotations for the BackupPVC
Signed-off-by: Clément Nussbaumer <clement.nussbaumer@postfinance.ch>
2025-08-29 10:10:41 +02:00
Xun Jiang/Bruce Jiang
04fb20676d Merge pull request #9215 from blackpiglet/9135_e2e
Add E2E test cases for repository maintenance job configuration.
2025-08-29 13:27:55 +08:00
Xun Jiang
996d2a025f Add E2E test cases for repository maintenance job configuration.
Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-08-28 20:06:15 +08:00
Joseph
528392ac5b Added struct change
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
56df64b625 Status fields are part of a struct
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
eb8b382816 Update design
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
571c9bd3ef Enhance wildcard namespace support design document
- Expanded the design to include detailed implementation steps for wildcard expansion in both backup and restore operations.
- Added new status fields to the backup and restore CRDs to track expanded wildcard namespaces.
- Clarified the approach to ensure backward compatibility with existing `*` behavior.
- Addressed limitations and provided insights on restore operations handling wildcard-expanded backups.

This update aims to provide a comprehensive and clear framework for implementing wildcard namespace support in Velero.

Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
037db22afe Refine wildcard namespace support design document
- Clarified the use of the standalone `*` character in namespace specifications.
- Ensured consistent formatting for `*` throughout the document.
- Maintained focus on backward compatibility and limitations regarding wildcard usage.

This update enhances the clarity and consistency of the design document for implementing wildcard namespace support in Velero.

Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
4c1457c318 Enhance wildcard namespace support in backup and restore design document
- Updated the abstract to clarify the current limitations of namespace specifications in Velero.
- Expanded the goals section to include specific objectives for implementing wildcard patterns in `--include-namespaces` and `--exclude-namespaces`.
- Detailed the high-level design and implementation steps, including the addition of new status fields in the backup CRD and the creation of a utility package for wildcard expansion.
- Addressed backward compatibility and known limitations regarding the use of wildcards alongside the existing "*" character.

This update aims to provide a comprehensive overview of the proposed changes for improved namespace selection flexibility.

Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
c0699c443b New design doc
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
69e307918b Update design
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
Joseph
571a816a61 Add design doc
Signed-off-by: Joseph <jvaikath@redhat.com>
2025-08-19 17:44:45 -07:00
copilot-swe-agent[bot]
49b2851f08 Add cross-references between node-agent config documents
Co-authored-by: kaovilai <11228024+kaovilai@users.noreply.github.com>
2025-08-19 17:49:20 +00:00
copilot-swe-agent[bot]
cbadd9047f Add comprehensive node-agent-config documentation
Co-authored-by: kaovilai <11228024+kaovilai@users.noreply.github.com>
2025-08-19 17:43:31 +00:00
copilot-swe-agent[bot]
764975ba29 Initial plan 2025-08-19 17:35:12 +00:00
lyndon-li
2de5a5c1a7 Merge branch 'main' into backup-repo-cache-design 2025-08-15 18:00:11 +08:00
Lyndon-Li
133db854b2 backup repo cache design
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2025-08-15 17:55:50 +08:00
254 changed files with 7381 additions and 1214 deletions

View File

@@ -8,16 +8,26 @@ on:
- "design/**"
- "**/*.md"
jobs:
get-go-version:
uses: ./.github/workflows/get-go-version.yaml
with:
ref: ${{ github.event.pull_request.base.ref }}
# Build the Velero CLI and image once for all Kubernetes versions, and cache it so the fan-out workers can get it.
build:
runs-on: ubuntu-latest
needs: get-go-version
outputs:
minio-dockerfile-sha: ${{ steps.minio-version.outputs.dockerfile_sha }}
steps:
- name: Check out the code
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
go-version: ${{ needs.get-go-version.outputs.version }}
# Look for a CLI that's made for this PR
- name: Fetch built CLI
id: cli-cache
@@ -44,6 +54,26 @@ jobs:
run: |
IMAGE=velero VERSION=pr-test BUILD_OUTPUT_TYPE=docker make container
docker save velero:pr-test-linux-amd64 -o ./velero.tar
# Check and build MinIO image once for all e2e tests
- name: Check Bitnami MinIO Dockerfile version
id: minio-version
run: |
DOCKERFILE_SHA=$(curl -s https://api.github.com/repos/bitnami/containers/commits?path=bitnami/minio/2025/debian-12/Dockerfile\&per_page=1 | jq -r '.[0].sha')
echo "dockerfile_sha=${DOCKERFILE_SHA}" >> $GITHUB_OUTPUT
- name: Cache MinIO Image
uses: actions/cache@v4
id: minio-cache
with:
path: ./minio-image.tar
key: minio-bitnami-${{ steps.minio-version.outputs.dockerfile_sha }}
- name: Build MinIO Image from Bitnami Dockerfile
if: steps.minio-cache.outputs.cache-hit != 'true'
run: |
echo "Building MinIO image from Bitnami Dockerfile..."
git clone --depth 1 https://github.com/bitnami/containers.git /tmp/bitnami-containers
cd /tmp/bitnami-containers/bitnami/minio/2025/debian-12
docker build -t bitnami/minio:local .
docker save bitnami/minio:local > ${{ github.workspace }}/minio-image.tar
# Create json of k8s versions to test
# from guide: https://stackoverflow.com/a/65094398/4590470
setup-test-matrix:
@@ -75,20 +105,34 @@ jobs:
needs:
- build
- setup-test-matrix
- get-go-version
runs-on: ubuntu-latest
strategy:
matrix: ${{fromJson(needs.setup-test-matrix.outputs.matrix)}}
fail-fast: false
steps:
- name: Check out the code
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
go-version: ${{ needs.get-go-version.outputs.version }}
# Fetch the pre-built MinIO image from the build job
- name: Fetch built MinIO Image
uses: actions/cache@v4
id: minio-cache
with:
path: ./minio-image.tar
key: minio-bitnami-${{ needs.build.outputs.minio-dockerfile-sha }}
- name: Load MinIO Image
run: |
echo "Loading MinIO image..."
docker load < ./minio-image.tar
- name: Install MinIO
run:
docker run -d --rm -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=minio123" -e "MINIO_DEFAULT_BUCKETS=bucket,additional-bucket" bitnami/minio:2021.6.17-debian-10-r7
run: |
docker run -d --rm -p 9000:9000 -e "MINIO_ROOT_USER=minio" -e "MINIO_ROOT_PASSWORD=minio123" -e "MINIO_DEFAULT_BUCKETS=bucket,additional-bucket" bitnami/minio:local
- uses: engineerd/setup-kind@v0.6.2
with:
skipClusterLogsExport: true
@@ -141,7 +185,7 @@ jobs:
timeout-minutes: 30
- name: Upload debug bundle
if: ${{ failure() }}
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: DebugBundle
name: DebugBundle-k8s-${{ matrix.k8s }}-job-${{ strategy.job-index }}
path: /home/runner/work/velero/velero/test/e2e/debug-bundle*

33
.github/workflows/get-go-version.yaml vendored Normal file
View File

@@ -0,0 +1,33 @@
on:
workflow_call:
inputs:
ref:
description: "The target branch's ref"
required: true
type: string
outputs:
version:
description: "The expected Go version"
value: ${{ jobs.extract.outputs.version }}
jobs:
extract:
runs-on: ubuntu-latest
outputs:
version: ${{ steps.pick-version.outputs.version }}
steps:
- name: Check out the code
uses: actions/checkout@v6
- id: pick-version
run: |
if [ "${{ inputs.ref }}" == "main" ]; then
version=$(grep '^go ' go.mod | awk '{print $2}' | cut -d. -f1-2)
else
goDirectiveVersion=$(grep '^go ' go.mod | awk '{print $2}')
toolChainVersion=$(grep '^toolchain ' go.mod | awk '{print $2}')
version=$(printf "%s\n%s\n" "$goDirectiveVersion" "$toolChainVersion" | sort -V | tail -n1)
fi
echo "version=$version"
echo "version=$version" >> $GITHUB_OUTPUT

View File

@@ -19,7 +19,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master

View File

@@ -12,7 +12,7 @@ jobs:
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Changelog check
if: ${{ !(contains(github.event.pull_request.labels.*.name, 'kind/changelog-not-required') || contains(github.event.pull_request.labels.*.name, 'Design') || contains(github.event.pull_request.labels.*.name, 'Website') || contains(github.event.pull_request.labels.*.name, 'Documentation'))}}

View File

@@ -1,18 +1,26 @@
name: Pull Request CI Check
on: [pull_request]
jobs:
get-go-version:
uses: ./.github/workflows/get-go-version.yaml
with:
ref: ${{ github.event.pull_request.base.ref }}
build:
name: Run CI
needs: get-go-version
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- name: Check out the code
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
go-version: ${{ needs.get-go-version.outputs.version }}
- name: Make ci
run: make ci
- name: Upload test coverage

View File

@@ -8,7 +8,7 @@ jobs:
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Codespell
uses: codespell-project/actions-codespell@master

View File

@@ -13,7 +13,7 @@ jobs:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
name: Checkout
- name: Set up QEMU

View File

@@ -14,7 +14,7 @@ jobs:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
name: Checkout
- name: Verify .goreleaser.yml and try a dryrun release.

View File

@@ -7,18 +7,26 @@ on:
- "design/**"
- "**/*.md"
jobs:
get-go-version:
uses: ./.github/workflows/get-go-version.yaml
with:
ref: ${{ github.event.pull_request.base.ref }}
build:
name: Run Linter Check
runs-on: ubuntu-latest
needs: get-go-version
steps:
- name: Check out the code
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
go-version: ${{ needs.get-go-version.outputs.version }}
- name: Linter check
uses: golangci/golangci-lint-action@v8
uses: golangci/golangci-lint-action@v9
with:
version: v2.1.1
version: v2.5.0
args: --verbose

View File

@@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
# The default value is "1" which fetches only a single commit. If we merge PR without squash or rebase,
# there are at least two commits: the first one is the merge commit and the second one is the real commit

View File

@@ -9,17 +9,24 @@ on:
- '*'
jobs:
get-go-version:
uses: ./.github/workflows/get-go-version.yaml
with:
ref: ${{ github.ref_name }}
build:
name: Build
runs-on: ubuntu-latest
needs: get-go-version
steps:
- name: Check out the code
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
go-version: ${{ needs.get-go-version.outputs.version }}
- name: Set up QEMU
id: qemu
uses: docker/setup-qemu-action@v3

View File

@@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout the latest code
uses: actions/checkout@v5
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Automatic Rebase

View File

@@ -7,7 +7,7 @@ jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v9.1.0
- uses: actions/stale@v10.1.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
stale-issue-message: "This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 14 days. If a Velero team member has requested log or more information, please provide the output of the shared commands."

View File

@@ -13,7 +13,7 @@
# limitations under the License.
# Velero binary build section
FROM --platform=$BUILDPLATFORM golang:1.24.6-bookworm AS velero-builder
FROM --platform=$BUILDPLATFORM golang:1.25-bookworm AS velero-builder
ARG GOPROXY
ARG BIN
@@ -49,7 +49,7 @@ RUN mkdir -p /output/usr/bin && \
go clean -modcache -cache
# Restic binary build section
FROM --platform=$BUILDPLATFORM golang:1.24.6-bookworm AS restic-builder
FROM --platform=$BUILDPLATFORM golang:1.25-bookworm AS restic-builder
ARG GOPROXY
ARG BIN
@@ -73,7 +73,7 @@ RUN mkdir -p /output/usr/bin && \
go clean -modcache -cache
# Velero image packing section
FROM paketobuildpacks/run-jammy-tiny:0.2.73
FROM paketobuildpacks/run-jammy-tiny:latest
LABEL maintainer="Xun Jiang <jxun@vmware.com>"

View File

@@ -15,7 +15,7 @@
ARG OS_VERSION=1809
# Velero binary build section
FROM --platform=$BUILDPLATFORM golang:1.24.6-bookworm AS velero-builder
FROM --platform=$BUILDPLATFORM golang:1.25-bookworm AS velero-builder
ARG GOPROXY
ARG BIN

View File

@@ -42,7 +42,7 @@ The following is a list of the supported Kubernetes versions for each Velero ver
| Velero version | Expected Kubernetes version compatibility | Tested on Kubernetes version |
|----------------|-------------------------------------------|-------------------------------------|
| 1.17 | 1.18-latest | 1.31.7, 1.32.3, and 1.33.1 |
| 1.17 | 1.18-latest | 1.31.7, 1.32.3, 1.33.1, and 1.34.0 |
| 1.16 | 1.18-latest | 1.31.4, 1.32.3, and 1.33.0 |
| 1.15 | 1.18-latest | 1.28.8, 1.29.8, 1.30.4 and 1.31.1 |
| 1.14 | 1.18-latest | 1.27.9, 1.28.9, and 1.29.4 |

View File

@@ -52,7 +52,7 @@ git_sha = str(local("git rev-parse HEAD", quiet = True, echo_off = True)).strip(
tilt_helper_dockerfile_header = """
# Tilt image
FROM golang:1.24.6 as tilt-helper
FROM golang:1.25 as tilt-helper
# Support live reloading with Tilt
RUN wget --output-document /restart.sh --quiet https://raw.githubusercontent.com/windmilleng/rerun-process-wrapper/master/restart.sh && \

View File

@@ -0,0 +1 @@
Fix issue #7725, add design for backup repo cache configuration

View File

@@ -0,0 +1 @@
feat: Permit specifying annotations for the BackupPVC

View File

@@ -0,0 +1 @@
Get pod list once per namespace in pvc IBA

View File

@@ -0,0 +1 @@
Fix issue #9229, don't attach backupPVC to the source node

View File

@@ -0,0 +1 @@
Update AzureAD Microsoft Authentication Library to v1.5.0

View File

@@ -0,0 +1 @@
Protect VolumeSnapshot field from race condition during multi-thread backup

View File

@@ -0,0 +1 @@
Fix repository maintenance jobs to inherit allowlisted tolerations from Velero deployment

View File

@@ -0,0 +1 @@
Fix schedule controller to prevent backup queue accumulation during extended blocking scenarios by properly handling empty backup phases

View File

@@ -0,0 +1 @@
Fix issue #7904, remove the code and doc for PVC node selection

View File

@@ -0,0 +1 @@
Implement concurrency control for cache of native VolumeSnapshotter plugin.

View File

@@ -0,0 +1 @@
Fix issue #9193, don't connect repo in repo controller

View File

@@ -0,0 +1 @@
Add option for privileged fs-backup pod

View File

@@ -0,0 +1 @@
Fix issue #9267, add events to data mover prepare diagnostic

View File

@@ -0,0 +1 @@
VerifyJSONConfigs verify every elements in Data.

View File

@@ -0,0 +1 @@
Fix typos in documentation

View File

@@ -0,0 +1 @@
Fix issue #9332, add bytesDone for cache files

View File

@@ -0,0 +1 @@
Add cache configuration to VGDP

View File

@@ -0,0 +1 @@
Fix the Job build error when BackupReposiotry name longer than 63.

View File

@@ -0,0 +1 @@
Add cache dir configuration for udmrepo

View File

@@ -0,0 +1 @@
Add snapshotSize for DataDownload, PodVolumeRestore

View File

@@ -0,0 +1 @@
Add incrementalSize to DU/PVB for reporting new/changed size

View File

@@ -0,0 +1 @@
Support cache volume for generic restore exposer and pod volume exposer

View File

@@ -0,0 +1 @@
Fix managed fields patch for resources using GenerateName

View File

@@ -0,0 +1 @@
Track actual resource names for GenerateName in restore status

View File

@@ -0,0 +1 @@
Add cache volume configuration

View File

@@ -0,0 +1 @@
Fix issue #9365, prevent fake completion notification due to multiple update of single PVR

View File

@@ -0,0 +1 @@
Refactor repo provider interface for static configuration

View File

@@ -0,0 +1 @@
don't copy securitycontext from first container if configmap found

View File

@@ -0,0 +1 @@
Cache volume support for DataDownload

View File

@@ -0,0 +1 @@
Cache volume for PVR

View File

@@ -0,0 +1 @@
Fix issue #9400, connect repo first time after creation so that init params could be written

View File

@@ -0,0 +1 @@
Fix issue #9276, add doc for cache volume support

View File

@@ -0,0 +1 @@
Apply volume policies to VolumeGroupSnapshot PVC filtering

View File

@@ -0,0 +1 @@
Fix issue #9194, add doc for GOMAXPROCS behavior change

View File

@@ -33,6 +33,12 @@ spec:
jsonPath: .status.progress.totalBytes
name: Total Bytes
type: integer
- description: Incremental bytes
format: int64
jsonPath: .status.incrementalBytes
name: Incremental Bytes
priority: 10
type: integer
- description: Name of the Backup Storage Location where this backup should be
stored
jsonPath: .spec.backupStorageLocation
@@ -189,6 +195,11 @@ spec:
format: date-time
nullable: true
type: string
incrementalBytes:
description: IncrementalBytes holds the number of bytes new or changed
since the last backup
format: int64
type: integer
message:
description: Message is a message about the pod volume backup's status.
type: string

View File

@@ -133,6 +133,10 @@ spec:
snapshotID:
description: SnapshotID is the ID of the volume snapshot to be restored.
type: string
snapshotSize:
description: SnapshotSize is the logical size in Bytes of the snapshot.
format: int64
type: integer
sourceNamespace:
description: SourceNamespace is the original namespace for namaspace
mapping.

File diff suppressed because one or more lines are too long

View File

@@ -108,6 +108,10 @@ spec:
description: SnapshotID is the ID of the Velero backup snapshot to
be restored from.
type: string
snapshotSize:
description: SnapshotSize is the logical size in Bytes of the snapshot.
format: int64
type: integer
sourceNamespace:
description: |-
SourceNamespace is the original namespace where the volume is backed up from.

View File

@@ -33,6 +33,12 @@ spec:
jsonPath: .status.progress.totalBytes
name: Total Bytes
type: integer
- description: Incremental bytes
format: int64
jsonPath: .status.incrementalBytes
name: Incremental Bytes
priority: 10
type: integer
- description: Name of the Backup Storage Location where this backup should be
stored
jsonPath: .spec.backupStorageLocation
@@ -173,6 +179,11 @@ spec:
as a result of the DataUpload.
nullable: true
type: object
incrementalBytes:
description: IncrementalBytes holds the number of bytes new or changed
since the last backup
format: int64
type: integer
message:
description: Message is a message about the DataUpload's status.
type: string

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,231 @@
# Backup Repository Cache Volume Design
## Glossary & Abbreviation
**Backup Storage**: The storage to store the backup data. Check [Unified Repository design][1] for details.
**Backup Repository**: Backup repository is layered between BR data movers and Backup Storage to provide BR related features that is introduced in [Unified Repository design][1].
**Velero Generic Data Path (VGDP)**: VGDP is the collective of modules that is introduced in [Unified Repository design][1]. Velero uses these modules to finish data transfer for various purposes (i.e., PodVolume backup/restore, Volume Snapshot Data Movement). VGDP modules include uploaders and the backup repository.
**Data Mover Pods**: Intermediate pods which hold VGDP and complete the data transfer. See [VGDP Micro Service for Volume Snapshot Data Movement][2] and [VGDP Micro Service For fs-backup][3] for details.
**Repository Maintenance Pods**: Pods for [Repository Maintenance Jobs][4], which holds VGDP to run repository maintenance.
## Background
According to the [Unified Repository design][1] Velero uses selectable backup repositories for various backup/restore methods, i.e., fs-backup, volume snapshot data movement, etc. Some backup repositories may need to cache data on the client side for various repository operation, so as to accelerate the execution.
In the existing [Backup Repository Configuration][5], we allow users to configure the cache data size (`cacheLimitMB`). However, the cache data is still stored in the root file system of data mover pods/repository maintenance pods, so stored in the root file system of the node. This is not good enough, reasons:
- In many distributions, the node's system disk size is predefined, non configurable and limit, e.g., the system disk size may be 20G or less
- Velero supports concurrent data movements in each node. The cache in each of the concurrent data mover pods could quickly run out of the system disk and cause problems like pod eviction, failure of pod creation, degradation of Kubernetes QoS, etc.
We need to allow users to prepare a dedicated location, e.g., a dedictated volume, for the cache.
Not all backup repositories or not all backup repository operations require cache, we need to define the details when and how the cache is used.
## Goals
- Create a mechanism for users to configure cache volumes for various pods running VGDP
- Design the workflow to assign the cache volume pod path to backup repositories
- Describe when and how the cache volume is used
## Non-Goals
- The solution is based on [Unified Repository design][1], [VGDP Micro Service for Volume Snapshot Data Movement][2] and [VGDP Micro Service For fs-backup][3], legacy data paths are not supported. E.g., when a pod volume restore (PVR) runs with legacy Restic path, if any data is cached, the cache still resides in the root file system.
## Solution
### Cache Data
Varying on backup repositoires, cache data may include payload data or repository metadata, e.g., indexes to the payload data chunks.
Payload data is highly related to the backup data, and normally take the majority of the repository data as well as the cache data.
Repository metadata is related to the backup repository's chunking algorithm, data chunk mapping method, etc, and so the size is not proportional to the backup data size.
On the other hand for some backup repository, in extreme cases, the repository metadata may be significantly large. E.g., Kopia's indexes are per chunks, if there are huge number of small files in the repository, Kopia's index data may be in the same level of or even larger than the payload data.
However, in the cases that repository metadata data become the majority, other bottlenecks may emerge and concurrency of data movers may be significantly constrained, so the requirement to cache volumes may go away.
Therefore, for now we only consider the cache volume requirement for payload data, and leave the consideration for metadata as a future enhancement.
### Scenarios
Backup repository cache varies on backup repositories and backup repository operation during VGDP runs. Below are the scenarios when VGDP runs:
- Data Upload for Backup: this is the process to upload/write the backup data into the backup repository, e.g., DataUpload or PodVolumeBackup. The pieces of data is almost directly written to the repository, sometimes with a small group staying shortly in the local place. That is to say, there should not be large scale data cached for this scenario, so we don't prepare dedicated cache for this scenario.
- Repository Maintenance: Repository maintenance most often visits the backup repository's metadata and sometimes it needs to visit the file system directories from the backed up data. On the other hand, it is not practical to run concurrent maintenance jobs in one node. So the cache data is neither large nor affect the root file system too much. Therefore, we don't need to prepare dedicated cache for this scenario.
- Data Download for Restore: this is the process to download/read the backup data from the backup repository during restore, e.g., DataDownload or PodVolumeRestore. For backup repositories for which data are stored in remote backup storages (e.g., Kopia repository stores data in remote object stores), large scale of data are cached locally to accerlerate the restore. Therefore, we need dedicate cache volumes for this scenario.
- Backup Deletion: During this scenario, backup repository is connected, metadata is enumerated to find the repository snapshot representing the backup data. That is to say, only metadata is cached if any. Therefore, dedicated cache volumes are not required in this scenario.
The above analyses are based on the common behavior of backup repositories and they are not considering the case that backup repository metadata takes majority or siginficant proportion of the cache data.
As a conclusion of the analyses, we will create dedicated cache volumes for restore scenarios.
For other scenarios, we can add them regarded to the future changes/requirements. The mechanism to expose and connect the cache volumes should work for all scenarios. E.g., if we need to consider the backup repository metadata case, we may need cache volumes for backup and repository maintenance as well, then we can just reuse the same cache volume provision and connection mechanism to backup and repository maintenance scenarios.
### Cache Data and Lifecycle
If available, one cache volume is dedicately assigned to one data mover pod. That is, the cached data is destroyed when the data mover pod completes. Then the backup repository instance also closes.
Cache data are fully managed by the specific backup repository. So the backup repository may also have its own way to GC the cache data.
That is to say, cache data GC may be launched by the backup repository instance during the running of the data mover pod; then the left data are automatically destroyed when the data mover pod and the cache PVC are destroyed (cache PVC's `reclaimPolicy` is always `Deleted`, so once the cache PVC is destroyed, the volume will also be destroyed). So no specially logics are needed for cache data GC.
### Data Size
Cache volumes take storage space and cluster resources (PVC, PV), therefore, cache volumes should be created only when necessary and the volumes should be with reasonable size based on the cache data size:
- It is not a good bargain to have cache volumes for small backups, small backups will use resident cache location (the cache location in the root file system)
- The cache data size has a limit, the existing `cacheLimitMB` is used for this purpose. E.g., it could be set as 1024 for a 1TB backup, which means 1GB of data is cached and the old cache data exceeding this size will be cleared. Therefore, it is meaningless to set the cache volume size much larger than `cacheLimitMB`
### Cache Volume Size
The cache volume size is calculated from below factors (for Restore scenarios):
- **Limit**: The limit of the cache data, that is represented by `cacheLimitMB`, the default value is 5GB
- **backupSize**: The size of the backup as a reference to evaluate whether to create a cache volume. It doesn't mean the backup data really decides the cache data all the time, it is just a reference to evaluate the scale of the backup, small scale backups may need small cache data. Sometimes, backupSize is not irrelevant to the size of cache data, in this case, ResidentThreshold should not be set, Limit will be used directly. It is unlikely that backupSize is unavailable, but once that happens, ResidentThreshold is ignored, Limit will be used directly.
- **ResidentThreshold**: The minimum backup size that a cache volume is created
- **InflationPercentage**: Considering the overhead of the file system and the possible delay of the cache cleanup, there should be an inflation for the final volume size vs. the logical size, otherwise, the cache volume may be overrun. This inflation percentage is hardcoded, e.g., 20%.
A formula is as below:
```
cacheVolumeSize = ((backupSize != 0 ? (backupSize > residentThreshold ? limit : 0) : limit) * (100 + inflationPercentage)) / 100
```
Finally, the `cacheVolumeSize` will be rounded up to GiB considering the UX friendliness, storage friendliness and management friendliness.
### PVC/PV
The PVC for a cache volume is created in Velero namespace and a storage class is required for the cache PVC. The PVC's accessMode is `ReadWriteOnce` and volumeMode is `FileSystem`, so the storage class provided should support this specification. Otherwise, if the storageclass doesn't support either of the specifications, the data mover pod may be hang in `Pending` state until a timeout setting with the data movement (e.g. `prepareTimeout`) and the data movement will finally fail.
It is not expected that the cache volume is retained after data mover pod is deleted, so the `reclaimPolicy` for the storageclass must be `Delete`.
To detect the problems in the storageclass and fail earlier, a validation is applied to the storageclass and once the validation fails, the cache configuration will be ignored, so the data mover pod will be created without a cache volume.
### Cache Volume Configurations
Below configurations are introduced:
- **residentThresholdMB**: the minimum data size(in MB) to be processed (if available) that a cache volume is created
- **cacheStorageClass**: the name of the storage class to provision the cache PVC
Not like `cacheLimitMB` which is set to and affect the backup repository, the above two configurations are actually data mover configurations of how to create cache volumes to data mover pods; and the two configurations don't need to be per backup repository. So we add them to the node-agent Configuration.
### Sample
Below are some examples of the node-agent configMap with the configurations:
Sample-1:
```json
{
"cacheVolume": {
"storageClass": "sc-1",
"residentThresholdMB": 1024
}
}
```
Sample-2:
```json
{
"cacheVolume": {
"storageClass": "sc-1",
}
}
```
Sample-3:
```json
{
"cacheVolume": {
"residentThresholdMB": 1024
}
}
```
**sample-1**: This is a valid configuration. Restores with backup data size larger than 1G will be assigned a cache volume using storage class `sc-1`.
**sample-2**: This is a valid configuration. Data mover pods are always assigned a cache volume using storage class `sc-1`.
**sample-3**: This is not a valid configuration because the storage class is absent. Velero gives up creating a cache volume.
To create the configMap, users need to save something like the above sample to a json file and then run below command:
```
kubectl create cm <ConfigMap name> -n velero --from-file=<json file name>
```
The cache volume configurations will be visited by node-agent server, so they also need to specify the `--node-agent-configmap` to the `velero node-agent` parameters.
## Detailed Design
### Backup and Restore
The restore needs to know the backup size so as to calculate the cache volume size, some new fields are added to the DataDownload and PodVolumeRestore CRDs.
`snapshotSize` field is also added to DataDownload and PodVolumeRestore's `spec`:
```yaml
spec:
snapshotID:
description: SnapshotID is the ID of the Velero backup snapshot to
be restored from.
type: string
snapshotSize:
description: SnapshotSize is the logical size of the snapshot.
format: int64
type: integer
```
`snapshotSize` represents the total size of the backup; during restore, the value is transferred from DataUpload/PodVolumeBackup's `Status.Progress.TotalBytes` to DataDownload/PodVolumeRestore.
It is unlikely that `Status.Progress.TotalBytes` from DataUpload/PodVolumeBackup is unavailable, but once it happens, according to the above formula, `residentThresholdMB` is ignored, cache volume size is calculated directly from cache limit for the corresponding backup repository.
### Exposer
Cache volume configurations are retrieved by node-agent and passed through DataDownload/PodVolumeRestore to GenericRestore exposer/PodVolume exposer.
The exposers are responsible to calculate cache volume size, create cache PVCs and mount them to the restorePods.
If the calculated cache volume size is 0, or any of the critical parameters is missing (e.g., cache volume storage class), the exposers ignore the cache volume configuration and continue with creating restorePods without cache volumes, so no impact to the result of the restore.
Exposers mount the cache volume to a predefined directory and pass the directory to the data mover pods through the `cache-volume-path` parameter.
Below data structure is added to the exposers' expose parameters:
```go
type GenericRestoreExposeParam struct {
// RestoreSize specifies the data size for the volume to be restored
RestoreSize int64
// CacheVolume specifies the info for cache volumes
CacheVolume *CacheVolumeInfo
}
type PodVolumeExposeParam struct {
// RestoreSize specifies the data size for the volume to be restored
RestoreSize int64
// CacheVolume specifies the info for cache volumes
CacheVolume *repocache.CacheConfigs
}
type CacheConfigs struct {
// StorageClass specifies the storage class for cache volumes
StorageClass string
// Limit specifies the maximum size of the cache data
Limit int64
// ResidentThreshold specifies the minimum size of the cache data to create a cache volume
ResidentThreshold int64
}
```
### Data Mover Pods
Data mover pods retrieve the cache volume directory from `cache-volume-path` parameter and pass it to Unified Repository.
If the directory is empty, Unified Repository uses the resident location for data cache, that is, the root file system.
### Kopia Repository
Kopia repository supports cache directory configuration for both metadata and data. The existing `SetupConnectOptions` is modified to customize the `CacheDirectory`:
```go
func SetupConnectOptions(ctx context.Context, repoOptions udmrepo.RepoOptions) repo.ConnectOptions {
...
return repo.ConnectOptions{
CachingOptions: content.CachingOptions{
CacheDirectory: cacheDir,
...
},
...
}
}
```
[1]: Implemented/unified-repo-and-kopia-integration/unified-repo-and-kopia-integration.md
[2]: Implemented/vgdp-micro-service/vgdp-micro-service.md
[3]: Implemented/vgdp-micro-service-for-fs-backup/vgdp-micro-service-for-fs-backup.md
[4]: Implemented/repo_maintenance_job_config.md
[5]: Implemented/backup-repo-config.md

View File

@@ -0,0 +1,257 @@
# Concurrent Backup Processing
This enhancement will enable Velero to process multiple backups at the same time. This is largely a usability enhancement rather than a performance enhancement, since the overall backup throughput may not be significantly improved over the current implementation, since we are already processing individual backup items in parallel. It is a significant usability improvement, though, as with the current design, a user who submits a small backup may have to wait significantly longer than expected if the backup is submitted immediately after a large backup.
## Background
With the current implementation, only one backup may be `InProgress` at a time. A second backup created will not start processing until the first backup moves on to `WaitingForPluginOperations` or `Finalizing`. This is a usability concern, especially in clusters when multiple users are initiating backups. With this enhancement, we intend to allow multiple backups to be processed concurrently. This will allow backups to start processing immediately, even if a large backup was just submitted by another user. This enhancement will build on top of the prior parallel item processing feature by creating a dedicatede ItemBlock worker pool for each running backup. The pool will be created at the beginning of the backup reconcile, and the input channel will be passed to the Kubernetes backupper just like it is in the current release.
The primary challenge is to make sure that the same workload in multiple backups is not backed up concurrently. If that were to happen, we would risk data corruption, especially around the processing of pod hooks and volume backup. For this first release we will take a conservative, high-level approach to overlap detection. Two backups will not run concurrently if there is any overlap in included namespaces. For example, if a backup that includes `ns1` and `ns2` is running, then a second backup for `ns2` and `ns3` will not be started. If a backup which does not filter namespaces is running (either a whole cluster backup or a non-namespace-limited backup with a label selector) then no other backups will be started, since a backup across all namespaces overlaps with any other backup. Calculating item-level overlap for queued backups is problematic since we don't know which items are included in a backup until backup processing has begun. A future release may add ItemBlock overlap detection, where at the item block worker level, the same item will not be processed by two different workers at the same time. This works together with workload conflict detection to further detect conflicts in a more granular level for shared resources between backups. Eventually, with a more complete understanding of individual workloads (either via ItemBlocks or some higher level model), the namespace level overlap detection may be relaxed in future versions.
## Goals
- Process multiple backups concurrently
- Detect namespace overlap to avoid conflicts
- For queued backups (not yet runnable due to concurrency limits or overlap), indicate the queue position in status
## Non Goals
- Handling NFS PVs when more than one PV point to the same underlying NFS share
- Handling VGDP cancellation for failed backups on restart
- Mounting a PVC for scenarios in which /tmp is too small for the number of concurrent backups
- Providing a mechanism to identify high priority backups which get preferential treatment in terms of ItemBlock worker availability
- Item-level overlap detection (future feature)
- Providing the ability to disable namespace-level overlap detection once Item-level overlap detection is in place (although this may be supported in a future version).
## High-Level Design
### Backup CRD changes
Two new backup phases will be added: `Queued` and `ReadyToStart`. In the Backup workflow, new backups will be moved to the Queued phase when they are added to the backup queue. When a backup is removed from the queue because it is now able to run, it will be moved to the `ReadyToStart` phase, which will allow the backup controller to start processing it.
In addition, a new Status field, `QueuePosition`, will be added to track the backup's current position in the queue.
### New Controller: `backupQueueReconciler`
A new reconciler will be added, `backupQueueReconciler` which will use the current `backupReconciler` logic for reconciling `New` backups but instead of running the backup, it will move the Backup to the `Queued` phase and set `QueuePosition`.
In addition, this reconciler will periodically reconcile all queued backups (on some configurable time interval) and if there is a runnable backup, remove it from the queue, update `QueuePosition` for any queued backups behind it, and update its phase to `ReadyToStart`.
Queued backups will be reconciled in order based on `QueuePosition`, so the first runnable backup found will be processed. A backup is runnable if both of the following conditions are true:
1) The total number of backups either `InProgress` or `ReadyToStart` is less than the configured number of concurrent backups.
2) The backup has no overlap with any backups currently `InProgress` or `ReadyToStart` or with any `Queued` backups with a higher (i.e. closer to 1) queue position than this backup.
### Updates to Backup controller
The current `backupReconciler` will change its reconciling rules. Instead of watching and reconciling New backups, it will reconcile `ReadyToStart` backups. In addition, it will be configured to run in parallel by setting `MaxConcurrentReconciles` based on the `concurrent-backups` server arg.
The startup (and shutdown) of the ItemBlock worker pool will be moved from reconciler startup to the backup reconcile, which will give each running backup its own dedicated worker pool. The per-backup worker pool will will use the existing `--item-block-worker-count` installer/server arg. This means that the maximum number of ItemBlock workers for the entire Velero pod will be the ItemBlock worker count multiplied by concurrentBackups. For example, if concurrentBackups is 5, and itemBlockWorkerCount is 6, then there will be, at most, 30 worker threads active, 5 dedicated to each InProgress backup, but this maximum will only be achieved when the maximum number of backups are InProgress. This also means that each InProgress backup will have a dedicated ItemBlock input channel with the same fixed buffer size.
## Detailed Design
### New Install/Server configuration args
A new install/server arg, `concurrent-backups` will be added. This will be an int-valued field specifying the number of backups which may be processed concurrently (with phase `InProgress`). If not specified, the default value of 1 will be used.
### Consideration of backup overlap and concurrent backup processing
The primary consideration for running additional backups concurrently is the configured `concurrent-backups` parameter. If the total number of `InProgress` and `ReadyToStart` backups is equal to `concurrent-backups` then any `Queued` backups will remain in the queue.
The second consideration is backup overlap. In order to prevent interaction between running backups (particularly around volume backup and pod hooks), we cannot allow two overlapping backups to run at the same time. For now, we will define overlap broadly -- requiring that two concurrent backups don't include any of the same namespaces. A backup for `ns1` can run concurrently with a backup for `ns2`, but a backup for `[ns1,ns2]` cannot run concurrently with a backup for `ns1`. One consequence of this approach is that a backup which includes all namespaces (even if further filtered by resource or label) cannot run concurrently with *any other backup*.
When determining which queued backup to run next, velero will look for the next queued backup which has no overlap with any InProgress backup or any Queued backup ahead of it. The reason we need to consider queued as well as running backups for overlap detection is as follows.
Consider the following scenario. These are the current not-completed backups (ordered from oldest to newest)
1. backup1, includedNamespaces: [ns1, ns2], phase: InProgress
2. backup2, includedNamespaces: [ns2, ns3, ns5], phase: Queued, QueuePosition: 1
3. backup3, includedNamespaces: [ns4, ns3], phase: Queued, QueuePosition: 2
4. backup4, includedNamespaces: [ns5, ns6], phase: Queued, QueuePosition: 2
5. backup5, includedNamespaces: [ns8, ns9], phase: Queued, QueuePosition: 3
Assuming `concurrent-backups` is 2, on the next reconcile, Velero will be able to start a second backup if there is one with no overlap. `backup2` cannot run, since `ns2` overlaps between it and the running `backup1`. If we only considered running overlap (and not queued overlap), then `backup3` could run now. It conflicts with the queued `backup2` on `ns3` but it does not conflict with the running backup. However, if it runs now, then when `backup1` completes, then `backup2` still can't run (since it now overlaps with running `backup3`on `ns3`), so `backup4` starts instead. Now when `backup3` completes, `backup2` still can't run (since it now conflicts with `backup4` on `ns5`). This means that even though it was the second backup created, it's the fourth to run -- providing worse time to completion than without parallel backups. If a queued backup has a large number of namespaces (a full-cluster backup for example), it would never run as long as new single-namespace backups keep being added to the queue.
To resolve this problem we consider both running backups as well as backups ahead in the queue when resolving overlap conflicts. In the above scenario, `backup2` can't run yet since it overlaps with the running backup on `ns2`. In addition, `backup3` and `backup4` also can't run yet since they overlap with queued `backup2`. Therefore, `backup5` will run now. Once `backup1` completes, `backup2` will be free to run.
### Backup CRD changes
New Backup phases:
```go
const (
// BackupPhaseQueued means the backup has been added to the
// queue by the BackupQueueReconciler.
BackupPhaseQueued BackupPhase = "Queued"
// BackupPhaseReadyToStart means the backup has been removed from the
// queue by the BackupQueueReconciler and is ready to start.
BackupPhaseReadyToStart BackupPhase = "ReadyToStart"
)
```
In addition, a new Status field, `queuePosition`, will be added to track the backup's current position in the queue.
```go
// QueuePosition is the position held by the backup in the queue.
// QueuePosition=1 means this backup is the next to be considered.
// Only relevant when Phase is "Queued"
// +optional
QueuePosition int `json:"queuePosition,omitempty"`
```
### New Controller: `backupQueueReconciler`
A new reconciler will be added, `backupQueueReconciler` which will reconcile backups under these conditions:
1) Watching Create/Update for backups in `New` (or empty) phase
2) Watching for Backup phase transition from `InProgress` to something else to reconcile all `Queued` backups
2) Watching for Backup phase transition from `New` (or empty) to `Queued` to reconcile all `Queued` backups
2) Periodic reconcile of `Queued` backups to handle backups queued at server startup as well as to make sure we never have a situation where backups are queued indefinitely because of a race condition or was otherwise missed in the reconcile on prior backup completion.
The reconciler will be set up as follows -- note that New backups are reconciled on Create/Update, while Queued backups are reconciled when an InProgress backup moves on to another state or when a new backup moves to the Queued state. We also reconcile Queued backups periodically to handle the case of a Velero pod restart with Queued backups, as well as to handle possible edge cases where a queued backup doesn't get moved out of the queue at the point of backup completion or an error occurs during a prior Queued backup reconcile.
```go
func (c *backupOperationsReconciler) SetupWithManager(mgr ctrl.Manager) error {
// only consider Queued backups, order by QueuePosition
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
backup := object.(*velerov1api.Backup)
return (backup.Status.Phase == velerov1api.BackupPhaseQueued)
})
s := kube.NewPeriodicalEnqueueSource(c.logger.WithField("controller", constant.ControllerBackupOperations), mgr.GetClient(), &velerov1api.BackupList{}, c.frequency, kube.PeriodicalEnqueueSourceOption{
Predicates: []predicate.Predicate{gp},
OrderFunc: queuePositionOrderFunc,
})
return ctrl.NewControllerManagedBy(mgr).
For(&velerov1api.Backup{}, builder.WithPredicates(predicate.Funcs{
UpdateFunc: func(ue event.UpdateEvent) bool {
backup := ue.ObjectNew.(*velerov1api.Backup)
return backup.Status.Phase == "" || backup.status.Phase == velerov1api.BackupPhaseNew
},
CreateFunc: func(event.CreateEvent) bool {
return backup.Status.Phase == "" || backup.status.Phase == velerov1api.BackupPhaseNew
},
DeleteFunc: func(de event.DeleteEvent) bool {
return false
},
GenericFunc: func(ge event.GenericEvent) bool {
return false
},
})).
Watch(
&source.Kind{Type: &velerov1api.Backup{}},
&handler.EnqueueRequestsFromMapFunc{
ToRequests: handler.ToRequestsFunc(func(a handler.MapObject) []reconcile.Request {
backupList := velerov1api.BackupList{}
if err := p.List(ctx, backupList); err != nil {
p.logger.WithError(err).Error("error listing backups")
return
}
requests = []reconcile.request{}
// filter backup list by Phase=queued
// sort backup list by queuePosition
return requests
}),
},
builder.WithPredicates(predicate.Funcs{
UpdateFunc: func(ue event.UpdateEvent) bool {
oldBackup := ue.ObjectOld.(*velerov1api.Backup)
newBackup := ue.ObjectNew.(*velerov1api.Backup)
return oldBackup.Status.Phase == velerov1api.BackupPhaseInProgress &&
newBackup.Status.Phase != velerov1api.BackupPhaseInProgress ||
oldBackup.Status.Phase != velerov1api.BackupPhaseQueued &&
newBackup.Status.Phase == velerov1api.BackupPhaseQueued
},
CreateFunc: func(event.CreateEvent) bool {
return false
},
DeleteFunc: func(de event.DeleteEvent) bool {
return false
},
GenericFunc: func(ge event.GenericEvent) bool {
return false
},
}).
WatchesRawSource(s).
Named(constant.ControllerBackupQueue).
Complete(c)
}
```
New backups will be queued: Phase will be set to `Queued`, and `QueuePosition` will be set to a int value incremented from the highest current `QueuePosition` value among Queued backups.
Queued backups will be removed from the queue if runnable:
1) If the total number of backups either InProgress or ReadyToStart is greater than or equal to the concurrency limit, then exit without removing from the queue.
2) If the current backup overlaps with any InProgress, ReadyToStart, or Queued backup with `QueuePosition < currentBackup.QueuePosition` then exit without removing from the queue.
3) If we get here, the backup is runnable. To resolve a potential race condition where an InProgress backup completes between reconciling the backup with QueuePosition `n-1` and reconciling the current backup with QueuePosition `n`, we also check to see whether there are any runnable backups in the queue ahead of this one. The only time this will happen is if a backup completes immediately before reconcile starts which either frees up a concurrency slot or removes a namespace conflict. In this case, we don't want to run the current backup since the one ahead of this one in the queue (which was recently passed over before the InProgress backup completed) must run first. In this case, exit without removing from the queue.
4) If we get here, remove the backup from the queue by setting Phase to `ReadyToStart` and `QueuePosition` to zero. Decrement the `QueuePosition` of any other Queued backups with a `QueuePosition` higher than the current backup's queue position prior to dequeuing. At this point, the backup reconciler will start the backup.
`if len(inProgressBackups)+len(pendingStartBackups) >= concurrentBackups`
```
switch original.Status.Phase {
case "", velerov1api.BackupPhaseNew:
// enqueue backup -- set phase=Queued, set queuePosition=maxCurrentQueuePosition+1
}
// We should only ever get these events when added in order by the periodical enqueue source
// so as long as the current backup has not conflicts ahead of it or running, we should be good to
// dequeue
case "", velerov1api.BackupPhaseQueued:
// list backups, filter on Queued, ReadyToStart, and InProgress
// if number of InProgress backups + number of ReadyToStart backups >= concurrency limit, exit
// generate list of all namespaces included in InProgress, ReadyToStart, and Queued backups with
// queuePosition < backup.Status.QueuePosition
// if overlap found, exit
// check backups ahead of this one in the queue for runnability. If any are runnable, exit
// dequeue backup: set Phase to ReadyToStart, QueuePosition to 0, and decrement QueuePosition
// for all QueuedBackups behind this one in the queue
}
```
The queue controller will run as a single reconciler thread, so we will not need to deal with concurrency issues when moving backups from New to Queued or from Queued to ReadyToStart, and all of the updates to QueuePosition will be from a single thread.
### Updates to Backup controller
The Reconcile logic will be updated to respond to ReadyToStart backups instead of New backups:
```
@@ -234,8 +234,8 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
// InProgress, we still need this check so we can return nil to indicate we've finished processing
// this key (even though it was a no-op).
switch original.Status.Phase {
- case "", velerov1api.BackupPhaseNew:
- // only process new backups
+ case velerov1api.BackupPhaseReadyToStart:
+ // only process ReadyToStart backups
default:
b.logger.WithFields(logrus.Fields{
"backup": kubeutil.NamespaceAndName(original),
```
In addition, it will be configured to run in parallel by setting `MaxConcurrentReconciles` based on the `concurrent-backups` server arg.
```
@@ -149,6 +149,9 @@ func NewBackupReconciler(
func (b *backupReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&velerov1api.Backup{}).
+ WithOptions(controller.Options{
+ MaxConcurrentReconciles: concurrentBackups,
+ }).
Named(constant.ControllerBackup).
Complete(b)
}
```
The controller-runtime core reconciler logic already prevents the same resource from being reconciled by two different reconciler threads, so we don't need to worry about concurrency issues at the controller level.
The workerPool reference will be moved from the backupReconciler to the backupRequest, since this will now be backup-specific, and the initialization code for the worker pool will be moved from the reconciler init into the backup reconcile. This worker pool will be shut down upon exiting the Reconcile method.
### Resilience to restart of velero pod
The new backup phases (`Queued` and `ReadyToStart`) will be resilient to velero pod restarts. If the velero pod crashes or is restarted, only backups in the `InProgress` phase will be failed, so there is no change to current behavior. Queued backups will retain their queue position on restart, and ReadyToStart backups will move to InProgress when reconciled.
### Observability
#### Logging
When a backup is dequeued, an info log message will also include the wait time, calculated as `now - creationTimestamp`. When a backup is passed over due to overlap, an info log message will indicate which namespaces were in conflict.
#### Velero CLI
The `velero backup describe` output will include the current queue position for queued backups.

View File

@@ -0,0 +1,115 @@
# Wildcard Namespace Support
## Abstract
Velero currently treats namespace patterns with glob characters as literal strings. This design adds wildcard expansion to support flexible namespace selection using patterns like `app-*` or `test-{dev,staging}`.
## Background
Requested in [#1874](https://github.com/vmware-tanzu/velero/issues/1874) for more flexible namespace selection.
## Goals
- Support glob pattern expansion in namespace includes/excludes
- Maintain backward compatibility with existing `*` behavior
## Non-Goals
- Complex regex patterns beyond basic globs
## High-Level Design
Wildcard expansion occurs early in both backup and restore flows, converting patterns to literal namespace lists before normal processing.
### Backup Flow
Expansion happens in `getResourceItems()` before namespace collection:
1. Check if wildcards exist using `ShouldExpandWildcards()`
2. Expand patterns against active cluster namespaces
3. Replace includes/excludes with expanded literal namespaces
4. Continue with normal backup processing
### Restore Flow
Expansion occurs in `execute()` after parsing backup contents:
1. Extract available namespaces from backup tar
2. Expand patterns against backup namespaces (not cluster namespaces)
3. Update restore context with expanded namespaces
4. Continue with normal restore processing
This ensures restore wildcards match actual backup contents, not current cluster state.
## Detailed Design
### Status Fields
Add wildcard expansion tracking to backup and restore CRDs:
```go
type WildcardNamespaceStatus struct {
// IncludeWildcardMatches records namespaces that matched include patterns
// +optional
IncludeWildcardMatches []string `json:"includeWildcardMatches,omitempty"`
// ExcludeWildcardMatches records namespaces that matched exclude patterns
// +optional
ExcludeWildcardMatches []string `json:"excludeWildcardMatches,omitempty"`
// WildcardResult records final namespaces after wildcard processing
// +optional
WildcardResult []string `json:"wildcardResult,omitempty"`
}
// Added to both BackupStatus and RestoreStatus
type BackupStatus struct {
// WildcardNamespaces contains wildcard expansion results
// +optional
WildcardNamespaces *WildcardNamespaceStatus `json:"wildcardNamespaces,omitempty"`
}
```
### Wildcard Expansion Package
New `pkg/util/wildcard/expand.go` package provides:
- `ShouldExpandWildcards()` - Skip expansion for simple "*" case
- `ExpandWildcards()` - Main expansion function using `github.com/gobwas/glob`
- Pattern validation rejecting unsupported regex symbols
**Supported patterns**: `*`, `?`, `[abc]`, `{a,b,c}`
**Unsupported**: `|()`, `**`
### Implementation Details
#### Backup Integration (`pkg/backup/item_collector.go`)
Expansion in `getResourceItems()`:
- Call `wildcard.ExpandWildcards()` with cluster namespaces
- Update `NamespaceIncludesExcludes` with expanded results
- Populate status fields with expansion results
#### Restore Integration (`pkg/restore/restore.go`)
Expansion in `execute()`:
```go
if wildcard.ShouldExpandWildcards(includes, excludes) {
availableNamespaces := extractNamespacesFromBackup(backupResources)
expandedIncludes, expandedExcludes, err := wildcard.ExpandWildcards(
availableNamespaces, includes, excludes)
// Update context and status
}
```
## Alternatives Considered
1. **Client-side expansion**: Rejected because it wouldn't work for scheduled backups
2. **Expansion in `collectNamespaces`**: Rejected because these functions expect literal namespaces
## Compatibility
Maintains full backward compatibility - existing "*" behavior unchanged.
## Implementation
Target: Velero 1.18

22
go.mod
View File

@@ -1,8 +1,6 @@
module github.com/vmware-tanzu/velero
go 1.24.0
toolchain go1.24.6
go 1.25.0
require (
cloud.google.com/go/storage v1.55.0
@@ -43,10 +41,9 @@ require (
github.com/stretchr/testify v1.10.0
github.com/vmware-tanzu/crash-diagnostics v0.3.7
go.uber.org/zap v1.27.0
golang.org/x/mod v0.26.0
golang.org/x/net v0.42.0
golang.org/x/mod v0.29.0
golang.org/x/oauth2 v0.30.0
golang.org/x/text v0.27.0
golang.org/x/text v0.31.0
google.golang.org/api v0.241.0
google.golang.org/grpc v1.73.0
google.golang.org/protobuf v1.36.6
@@ -75,7 +72,7 @@ require (
cloud.google.com/go/monitoring v1.24.2 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1 // indirect
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 // indirect
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 // indirect
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect
@@ -182,13 +179,14 @@ require (
go.opentelemetry.io/otel/trace v1.37.0 // indirect
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/crypto v0.40.0 // indirect
golang.org/x/crypto v0.45.0 // indirect
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/sys v0.34.0 // indirect
golang.org/x/term v0.33.0 // indirect
golang.org/x/net v0.47.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/term v0.37.0 // indirect
golang.org/x/time v0.12.0 // indirect
golang.org/x/tools v0.34.0 // indirect
golang.org/x/tools v0.38.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect

36
go.sum
View File

@@ -95,8 +95,8 @@ github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZ
github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM=
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE=
github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 h1:oygO0locgZJe7PpYPXT5A29ZkwJaPqcva7BVeemZOZs=
github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI=
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 h1:IEjq88XO4PuBDcvmjQJcQGg+w+UaafSy8G5Kcb5tBhI=
@@ -794,8 +794,8 @@ golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=
golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -833,8 +833,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=
golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -880,8 +880,8 @@ golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLd
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -908,8 +908,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -973,14 +973,14 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg=
golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0=
golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -990,8 +990,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -1051,8 +1051,8 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f
golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
FROM --platform=$TARGETPLATFORM golang:1.24.6-bookworm
FROM --platform=$TARGETPLATFORM golang:1.25-bookworm
ARG GOPROXY
@@ -94,7 +94,7 @@ RUN ARCH=$(go env GOARCH) && \
chmod +x /usr/bin/goreleaser
# get golangci-lint
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.1.1
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.5.0
# install kubectl
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/$(go env GOARCH)/kubectl

View File

@@ -170,6 +170,9 @@ type SnapshotDataMovementInfo struct {
// Moved snapshot data size.
Size int64 `json:"size"`
// Moved snapshot incremental size.
IncrementalSize int64 `json:"incrementalSize,omitempty"`
// The DataUpload's Status.Phase value
Phase velerov2alpha1.DataUploadPhase
}
@@ -217,6 +220,9 @@ type PodVolumeInfo struct {
// The snapshot corresponding volume size.
Size int64 `json:"size,omitempty"`
// The incremental snapshot size.
IncrementalSize int64 `json:"incrementalSize,omitempty"`
// The type of the uploader that uploads the data. The valid values are `kopia` and `restic`.
UploaderType string `json:"uploaderType"`
@@ -240,14 +246,15 @@ type PodVolumeInfo struct {
func newPodVolumeInfoFromPVB(pvb *velerov1api.PodVolumeBackup) *PodVolumeInfo {
return &PodVolumeInfo{
SnapshotHandle: pvb.Status.SnapshotID,
Size: pvb.Status.Progress.TotalBytes,
UploaderType: pvb.Spec.UploaderType,
VolumeName: pvb.Spec.Volume,
PodName: pvb.Spec.Pod.Name,
PodNamespace: pvb.Spec.Pod.Namespace,
NodeName: pvb.Spec.Node,
Phase: pvb.Status.Phase,
SnapshotHandle: pvb.Status.SnapshotID,
Size: pvb.Status.Progress.TotalBytes,
IncrementalSize: pvb.Status.IncrementalBytes,
UploaderType: pvb.Spec.UploaderType,
VolumeName: pvb.Spec.Volume,
PodName: pvb.Spec.Pod.Name,
PodNamespace: pvb.Spec.Pod.Namespace,
NodeName: pvb.Spec.Node,
Phase: pvb.Status.Phase,
}
}

View File

@@ -118,6 +118,10 @@ type PodVolumeBackupStatus struct {
// +optional
Progress shared.DataMoveOperationProgress `json:"progress,omitempty"`
// IncrementalBytes holds the number of bytes new or changed since the last backup
// +optional
IncrementalBytes int64 `json:"incrementalBytes,omitempty"`
// AcceptedTimestamp records the time the pod volume backup is to be prepared.
// The server's time is used for AcceptedTimestamp
// +optional
@@ -134,6 +138,7 @@ type PodVolumeBackupStatus struct {
// +kubebuilder:printcolumn:name="Started",type="date",JSONPath=".status.startTimestamp",description="Time duration since this PodVolumeBackup was started"
// +kubebuilder:printcolumn:name="Bytes Done",type="integer",format="int64",JSONPath=".status.progress.bytesDone",description="Completed bytes"
// +kubebuilder:printcolumn:name="Total Bytes",type="integer",format="int64",JSONPath=".status.progress.totalBytes",description="Total bytes"
// +kubebuilder:printcolumn:name="Incremental Bytes",type="integer",format="int64",JSONPath=".status.incrementalBytes",description="Incremental bytes",priority=10
// +kubebuilder:printcolumn:name="Storage Location",type="string",JSONPath=".spec.backupStorageLocation",description="Name of the Backup Storage Location where this backup should be stored"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since this PodVolumeBackup was created"
// +kubebuilder:printcolumn:name="Node",type="string",JSONPath=".status.node",description="Name of the node where the PodVolumeBackup is processed"

View File

@@ -58,6 +58,10 @@ type PodVolumeRestoreSpec struct {
// Cancel indicates request to cancel the ongoing PodVolumeRestore. It can be set
// when the PodVolumeRestore is in InProgress phase
Cancel bool `json:"cancel,omitempty"`
// SnapshotSize is the logical size in Bytes of the snapshot.
// +optional
SnapshotSize int64 `json:"snapshotSize,omitempty"`
}
// PodVolumeRestorePhase represents the lifecycle phase of a PodVolumeRestore.

View File

@@ -58,6 +58,10 @@ type DataDownloadSpec struct {
// NodeOS is OS of the node where the DataDownload is processed.
// +optional
NodeOS NodeOS `json:"nodeOS,omitempty"`
// SnapshotSize is the logical size in Bytes of the snapshot.
// +optional
SnapshotSize int64 `json:"snapshotSize,omitempty"`
}
// TargetVolumeSpec is the specification for a target PVC.

View File

@@ -155,6 +155,10 @@ type DataUploadStatus struct {
// +optional
Progress shared.DataMoveOperationProgress `json:"progress,omitempty"`
// IncrementalBytes holds the number of bytes new or changed since the last backup
// +optional
IncrementalBytes int64 `json:"incrementalBytes,omitempty"`
// Node is name of the node where the DataUpload is processed.
// +optional
Node string `json:"node,omitempty"`
@@ -185,6 +189,7 @@ type DataUploadStatus struct {
// +kubebuilder:printcolumn:name="Started",type="date",JSONPath=".status.startTimestamp",description="Time duration since this DataUpload was started"
// +kubebuilder:printcolumn:name="Bytes Done",type="integer",format="int64",JSONPath=".status.progress.bytesDone",description="Completed bytes"
// +kubebuilder:printcolumn:name="Total Bytes",type="integer",format="int64",JSONPath=".status.progress.totalBytes",description="Total bytes"
// +kubebuilder:printcolumn:name="Incremental Bytes",type="integer",format="int64",JSONPath=".status.incrementalBytes",description="Incremental bytes",priority=10
// +kubebuilder:printcolumn:name="Storage Location",type="string",JSONPath=".spec.backupStorageLocation",description="Name of the Backup Storage Location where this backup should be stored"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since this DataUpload was created"
// +kubebuilder:printcolumn:name="Node",type="string",JSONPath=".status.node",description="Name of the node where the DataUpload is processed"
@@ -244,4 +249,8 @@ type DataUploadResult struct {
// NodeOS is OS of the node where the DataUpload is processed.
// +optional
NodeOS NodeOS `json:"nodeOS,omitempty"`
// SnapshotSize is the logical size in Bytes of the snapshot.
// +optional
SnapshotSize int64 `json:"snapshotSize,omitempty"`
}

View File

@@ -621,8 +621,30 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference(
return nil, errors.Wrapf(err, "failed to list PVCs in VolumeGroupSnapshot group %q in namespace %q", group, pvc.Namespace)
}
// Filter PVCs by volume policy
filteredPVCs, err := p.filterPVCsByVolumePolicy(groupedPVCs, backup)
if err != nil {
return nil, errors.Wrapf(err, "failed to filter PVCs by volume policy for VolumeGroupSnapshot group %q", group)
}
// Warn if any PVCs were filtered out
if len(filteredPVCs) < len(groupedPVCs) {
for _, originalPVC := range groupedPVCs {
found := false
for _, filteredPVC := range filteredPVCs {
if originalPVC.Name == filteredPVC.Name {
found = true
break
}
}
if !found {
p.log.Warnf("PVC %s/%s has VolumeGroupSnapshot label %s=%s but is excluded by volume policy", originalPVC.Namespace, originalPVC.Name, vgsLabelKey, group)
}
}
}
// Determine the CSI driver for the grouped PVCs
driver, err := p.determineCSIDriver(groupedPVCs)
driver, err := p.determineCSIDriver(filteredPVCs)
if err != nil {
return nil, errors.Wrapf(err, "failed to determine CSI driver for PVCs in VolumeGroupSnapshot group %q", group)
}
@@ -643,7 +665,7 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference(
}
// Wait for all the VS objects associated with the VGS to have status and VGS Name (VS readiness is checked in legacy flow) and get the PVC-to-VS map
vsMap, err := p.waitForVGSAssociatedVS(ctx, groupedPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration)
vsMap, err := p.waitForVGSAssociatedVS(ctx, filteredPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration)
if err != nil {
return nil, errors.Wrapf(err, "timeout waiting for VolumeSnapshots to have status created via VolumeGroupSnapshot %s", newVGS.Name)
}
@@ -734,6 +756,40 @@ func (p *pvcBackupItemAction) listGroupedPVCs(ctx context.Context, namespace, la
return pvcList.Items, nil
}
func (p *pvcBackupItemAction) filterPVCsByVolumePolicy(
pvcs []corev1api.PersistentVolumeClaim,
backup *velerov1api.Backup,
) ([]corev1api.PersistentVolumeClaim, error) {
var filteredPVCs []corev1api.PersistentVolumeClaim
for _, pvc := range pvcs {
// Convert PVC to unstructured for ShouldPerformSnapshotWithBackup
pvcMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&pvc)
if err != nil {
return nil, errors.Wrapf(err, "failed to convert PVC %s/%s to unstructured", pvc.Namespace, pvc.Name)
}
unstructuredPVC := &unstructured.Unstructured{Object: pvcMap}
// Check if this PVC should be snapshotted according to volume policies
shouldSnapshot, err := volumehelper.ShouldPerformSnapshotWithBackup(
unstructuredPVC,
kuberesource.PersistentVolumeClaims,
*backup,
p.crClient,
p.log,
)
if err != nil {
return nil, errors.Wrapf(err, "failed to check volume policy for PVC %s/%s", pvc.Namespace, pvc.Name)
}
if shouldSnapshot {
filteredPVCs = append(filteredPVCs, pvc)
}
}
return filteredPVCs, nil
}
func (p *pvcBackupItemAction) determineCSIDriver(
pvcs []corev1api.PersistentVolumeClaim,
) (string, error) {

View File

@@ -586,6 +586,280 @@ func TestListGroupedPVCs(t *testing.T) {
}
}
func TestFilterPVCsByVolumePolicy(t *testing.T) {
tests := []struct {
name string
pvcs []corev1api.PersistentVolumeClaim
pvs []corev1api.PersistentVolume
volumePolicyStr string
expectCount int
expectError bool
}{
{
name: "All PVCs should be included when no volume policy",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-1", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-1",
StorageClassName: pointer.String("sc-1"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-2", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-2",
StorageClassName: pointer.String("sc-1"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-1"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-2"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"},
},
},
},
},
expectCount: 2,
},
{
name: "Filter out NFS PVC by volume policy",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-csi", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-csi",
StorageClassName: pointer.String("sc-1"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-csi"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export",
},
},
},
},
},
volumePolicyStr: `
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
`,
expectCount: 1,
},
{
name: "All PVCs filtered out by volume policy",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-1", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs-1",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-2", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs-2",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-1"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export/1",
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-2"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export/2",
},
},
},
},
},
volumePolicyStr: `
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
`,
expectCount: 0,
},
{
name: "Filter out non-CSI PVCs from mixed driver group",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pvc-linstor",
Namespace: "ns-1",
Labels: map[string]string{"app.kubernetes.io/instance": "myapp"},
},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-linstor",
StorageClassName: pointer.String("sc-linstor"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pvc-nfs",
Namespace: "ns-1",
Labels: map[string]string{"app.kubernetes.io/instance": "myapp"},
},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-linstor"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "linstor.csi.linbit.com"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export",
},
},
},
},
},
volumePolicyStr: `
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
`,
expectCount: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
objs := []runtime.Object{}
for i := range tt.pvs {
objs = append(objs, &tt.pvs[i])
}
client := velerotest.NewFakeControllerRuntimeClient(t, objs...)
backup := &velerov1api.Backup{
ObjectMeta: metav1.ObjectMeta{
Name: "test-backup",
Namespace: "velero",
},
Spec: velerov1api.BackupSpec{},
}
// Add volume policy ConfigMap if specified
if tt.volumePolicyStr != "" {
cm := &corev1api.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "volume-policy",
Namespace: "velero",
},
Data: map[string]string{
"volume-policy": tt.volumePolicyStr,
},
}
require.NoError(t, client.Create(t.Context(), cm))
backup.Spec.ResourcePolicy = &corev1api.TypedLocalObjectReference{
Kind: "ConfigMap",
Name: "volume-policy",
}
}
action := &pvcBackupItemAction{
log: velerotest.NewLogger(),
crClient: client,
}
result, err := action.filterPVCsByVolumePolicy(tt.pvcs, backup)
if tt.expectError {
require.Error(t, err)
} else {
require.NoError(t, err)
require.Len(t, result, tt.expectCount)
// For mixed driver scenarios, verify filtered result can determine single CSI driver
if tt.name == "Filter out non-CSI PVCs from mixed driver group" && len(result) > 0 {
driver, err := action.determineCSIDriver(result)
require.NoError(t, err, "After filtering, determineCSIDriver should not fail with multiple drivers error")
require.Equal(t, "linstor.csi.linbit.com", driver, "Should have the Linstor driver after filtering out NFS")
}
}
})
}
}
func TestDetermineCSIDriver(t *testing.T) {
tests := []struct {
name string

View File

@@ -366,7 +366,7 @@ func (kb *kubernetesBackupper) BackupWithResolvers(
discoveryHelper: kb.discoveryHelper,
podVolumeBackupper: podVolumeBackupper,
podVolumeSnapshotTracker: podvolume.NewTracker(),
volumeSnapshotterGetter: volumeSnapshotterGetter,
volumeSnapshotterCache: NewVolumeSnapshotterCache(volumeSnapshotterGetter),
itemHookHandler: &hook.DefaultItemHookHandler{
PodCommandExecutor: kb.podCommandExecutor,
},
@@ -1198,6 +1198,7 @@ func updateVolumeInfos(
volumeInfos[index].SnapshotDataMovementInfo.SnapshotHandle = dataUpload.Status.SnapshotID
volumeInfos[index].SnapshotDataMovementInfo.RetainedSnapshot = dataUpload.Spec.CSISnapshot.VolumeSnapshot
volumeInfos[index].SnapshotDataMovementInfo.Size = dataUpload.Status.Progress.TotalBytes
volumeInfos[index].SnapshotDataMovementInfo.IncrementalSize = dataUpload.Status.IncrementalBytes
volumeInfos[index].SnapshotDataMovementInfo.Phase = dataUpload.Status.Phase
if dataUpload.Status.Phase == velerov2alpha1.DataUploadPhaseCompleted {

View File

@@ -3269,7 +3269,7 @@ func TestBackupWithSnapshots(t *testing.T) {
err := h.backupper.Backup(h.log, tc.req, backupFile, nil, nil, tc.snapshotterGetter)
require.NoError(t, err)
assert.Equal(t, tc.want, tc.req.VolumeSnapshots)
assert.Equal(t, tc.want, tc.req.VolumeSnapshots.Get())
})
}
}
@@ -4213,7 +4213,7 @@ func TestBackupWithPodVolume(t *testing.T) {
assert.Equal(t, tc.want, req.PodVolumeBackups)
// this assumes that we don't have any test cases where some PVs should be snapshotted using a VolumeSnapshotter
assert.Nil(t, req.VolumeSnapshots)
assert.Nil(t, req.VolumeSnapshots.Get())
})
}
}
@@ -5578,6 +5578,7 @@ func TestUpdateVolumeInfos(t *testing.T) {
CSISnapshot(&velerov2alpha1.CSISnapshotSpec{VolumeSnapshot: "vs-1"}).
SnapshotID("snapshot-id").
Progress(shared.DataMoveOperationProgress{TotalBytes: 1000}).
IncrementalBytes(500).
Phase(velerov2alpha1.DataUploadPhaseFailed).
SourceNamespace("ns-1").
SourcePVC("pvc-1").
@@ -5603,6 +5604,7 @@ func TestUpdateVolumeInfos(t *testing.T) {
RetainedSnapshot: "vs-1",
SnapshotHandle: "snapshot-id",
Size: 1000,
IncrementalSize: 500,
Phase: velerov2alpha1.DataUploadPhaseFailed,
},
},
@@ -5616,6 +5618,7 @@ func TestUpdateVolumeInfos(t *testing.T) {
CSISnapshot(&velerov2alpha1.CSISnapshotSpec{VolumeSnapshot: "vs-1"}).
SnapshotID("snapshot-id").
Progress(shared.DataMoveOperationProgress{TotalBytes: 1000}).
IncrementalBytes(500).
Phase(velerov2alpha1.DataUploadPhaseCompleted).
SourceNamespace("ns-1").
SourcePVC("pvc-1").
@@ -5641,6 +5644,7 @@ func TestUpdateVolumeInfos(t *testing.T) {
RetainedSnapshot: "vs-1",
SnapshotHandle: "snapshot-id",
Size: 1000,
IncrementalSize: 500,
Phase: velerov2alpha1.DataUploadPhaseCompleted,
},
},
@@ -5655,6 +5659,7 @@ func TestUpdateVolumeInfos(t *testing.T) {
CSISnapshot(&velerov2alpha1.CSISnapshotSpec{VolumeSnapshot: "vs-1"}).
SnapshotID("snapshot-id").
Progress(shared.DataMoveOperationProgress{TotalBytes: 1000}).
IncrementalBytes(500).
Phase(velerov2alpha1.DataUploadPhaseCompleted).
SourceNamespace("ns-1").
SourcePVC("pvc-1").

View File

@@ -70,13 +70,11 @@ type itemBackupper struct {
discoveryHelper discovery.Helper
podVolumeBackupper podvolume.Backupper
podVolumeSnapshotTracker *podvolume.Tracker
volumeSnapshotterGetter VolumeSnapshotterGetter
kubernetesBackupper *kubernetesBackupper
itemHookHandler hook.ItemHookHandler
snapshotLocationVolumeSnapshotters map[string]vsv1.VolumeSnapshotter
hookTracker *hook.HookTracker
volumeHelperImpl volumehelper.VolumeHelper
volumeSnapshotterCache *VolumeSnapshotterCache
itemHookHandler hook.ItemHookHandler
hookTracker *hook.HookTracker
volumeHelperImpl volumehelper.VolumeHelper
}
type FileForArchive struct {
@@ -502,30 +500,6 @@ func (ib *itemBackupper) executeActions(
return obj, itemFiles, nil
}
// volumeSnapshotter instantiates and initializes a VolumeSnapshotter given a VolumeSnapshotLocation,
// or returns an existing one if one's already been initialized for the location.
func (ib *itemBackupper) volumeSnapshotter(snapshotLocation *velerov1api.VolumeSnapshotLocation) (vsv1.VolumeSnapshotter, error) {
if bs, ok := ib.snapshotLocationVolumeSnapshotters[snapshotLocation.Name]; ok {
return bs, nil
}
bs, err := ib.volumeSnapshotterGetter.GetVolumeSnapshotter(snapshotLocation.Spec.Provider)
if err != nil {
return nil, err
}
if err := bs.Init(snapshotLocation.Spec.Config); err != nil {
return nil, err
}
if ib.snapshotLocationVolumeSnapshotters == nil {
ib.snapshotLocationVolumeSnapshotters = make(map[string]vsv1.VolumeSnapshotter)
}
ib.snapshotLocationVolumeSnapshotters[snapshotLocation.Name] = bs
return bs, nil
}
// zoneLabelDeprecated is the label that stores availability-zone info
// on PVs this is deprecated on Kubernetes >= 1.17.0
// zoneLabel is the label that stores availability-zone info
@@ -641,7 +615,7 @@ func (ib *itemBackupper) takePVSnapshot(obj runtime.Unstructured, log logrus.Fie
for _, snapshotLocation := range ib.backupRequest.SnapshotLocations {
log := log.WithField("volumeSnapshotLocation", snapshotLocation.Name)
bs, err := ib.volumeSnapshotter(snapshotLocation)
bs, err := ib.volumeSnapshotterCache.SetNX(snapshotLocation)
if err != nil {
log.WithError(err).Error("Error getting volume snapshotter for volume snapshot location")
continue
@@ -699,7 +673,7 @@ func (ib *itemBackupper) takePVSnapshot(obj runtime.Unstructured, log logrus.Fie
snapshot.Status.Phase = volume.SnapshotPhaseCompleted
snapshot.Status.ProviderSnapshotID = snapshotID
}
ib.backupRequest.VolumeSnapshots = append(ib.backupRequest.VolumeSnapshots, snapshot)
ib.backupRequest.VolumeSnapshots.Add(snapshot)
// nil errors are automatically removed
return kubeerrs.NewAggregate(errs)

View File

@@ -17,6 +17,8 @@ limitations under the License.
package backup
import (
"sync"
"github.com/vmware-tanzu/velero/internal/hook"
"github.com/vmware-tanzu/velero/internal/resourcepolicies"
"github.com/vmware-tanzu/velero/internal/volume"
@@ -32,11 +34,27 @@ type itemKey struct {
name string
}
type SynchronizedVSList struct {
sync.Mutex
VolumeSnapshotList []*volume.Snapshot
}
func (s *SynchronizedVSList) Add(vs *volume.Snapshot) {
s.Lock()
defer s.Unlock()
s.VolumeSnapshotList = append(s.VolumeSnapshotList, vs)
}
func (s *SynchronizedVSList) Get() []*volume.Snapshot {
s.Lock()
defer s.Unlock()
return s.VolumeSnapshotList
}
// Request is a request for a backup, with all references to other objects
// materialized (e.g. backup/snapshot locations, includes/excludes, etc.)
type Request struct {
*velerov1api.Backup
StorageLocation *velerov1api.BackupStorageLocation
SnapshotLocations []*velerov1api.VolumeSnapshotLocation
NamespaceIncludesExcludes *collections.IncludesExcludes
@@ -44,7 +62,7 @@ type Request struct {
ResourceHooks []hook.ResourceHook
ResolvedActions []framework.BackupItemResolvedActionV2
ResolvedItemBlockActions []framework.ItemBlockResolvedAction
VolumeSnapshots []*volume.Snapshot
VolumeSnapshots SynchronizedVSList
PodVolumeBackups []*velerov1api.PodVolumeBackup
BackedUpItems *backedUpItemsMap
itemOperationsList *[]*itemoperation.BackupOperation
@@ -80,7 +98,7 @@ func (r *Request) FillVolumesInformation() {
}
r.VolumesInformation.SkippedPVs = skippedPVMap
r.VolumesInformation.NativeSnapshots = r.VolumeSnapshots
r.VolumesInformation.NativeSnapshots = r.VolumeSnapshots.Get()
r.VolumesInformation.PodVolumeBackups = r.PodVolumeBackups
r.VolumesInformation.BackupOperations = *r.GetItemOperationsList()
r.VolumesInformation.BackupName = r.Backup.Name

View File

@@ -0,0 +1,42 @@
package backup
import (
"sync"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
vsv1 "github.com/vmware-tanzu/velero/pkg/plugin/velero/volumesnapshotter/v1"
)
type VolumeSnapshotterCache struct {
cache map[string]vsv1.VolumeSnapshotter
mutex sync.Mutex
getter VolumeSnapshotterGetter
}
func NewVolumeSnapshotterCache(getter VolumeSnapshotterGetter) *VolumeSnapshotterCache {
return &VolumeSnapshotterCache{
cache: make(map[string]vsv1.VolumeSnapshotter),
getter: getter,
}
}
func (c *VolumeSnapshotterCache) SetNX(location *velerov1api.VolumeSnapshotLocation) (vsv1.VolumeSnapshotter, error) {
c.mutex.Lock()
defer c.mutex.Unlock()
if snapshotter, exists := c.cache[location.Name]; exists {
return snapshotter, nil
}
snapshotter, err := c.getter.GetVolumeSnapshotter(location.Spec.Provider)
if err != nil {
return nil, err
}
if err := snapshotter.Init(location.Spec.Config); err != nil {
return nil, err
}
c.cache[location.Name] = snapshotter
return snapshotter, nil
}

View File

@@ -145,6 +145,12 @@ func (d *DataUploadBuilder) Progress(progress shared.DataMoveOperationProgress)
return d
}
// IncrementalBytes sets the DataUpload's IncrementalBytes.
func (d *DataUploadBuilder) IncrementalBytes(incrementalBytes int64) *DataUploadBuilder {
d.object.Status.IncrementalBytes = incrementalBytes
return d
}
// Node sets the DataUpload's Node.
func (d *DataUploadBuilder) Node(node string) *DataUploadBuilder {
d.object.Status.Node = node
@@ -180,3 +186,9 @@ func (d *DataUploadBuilder) Message(msg string) *DataUploadBuilder {
d.object.Status.Message = msg
return d
}
// TotalBytes sets the DataUpload's TotalBytes.
func (d *DataUploadBuilder) TotalBytes(size int64) *DataUploadBuilder {
d.object.Status.Progress.TotalBytes = size
return d
}

View File

@@ -0,0 +1,52 @@
/*
Copyright 2019 the Velero contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package builder
import (
corev1api "k8s.io/api/core/v1"
schedulingv1api "k8s.io/api/scheduling/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
type PriorityClassBuilder struct {
object *schedulingv1api.PriorityClass
}
func ForPriorityClass(name string) *PriorityClassBuilder {
return &PriorityClassBuilder{
object: &schedulingv1api.PriorityClass{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
},
}
}
func (p *PriorityClassBuilder) Value(value int) *PriorityClassBuilder {
p.object.Value = int32(value)
return p
}
func (p *PriorityClassBuilder) PreemptionPolicy(policy string) *PriorityClassBuilder {
preemptionPolicy := corev1api.PreemptionPolicy(policy)
p.object.PreemptionPolicy = &preemptionPolicy
return p
}
func (p *PriorityClassBuilder) Result() *schedulingv1api.PriorityClass {
return p.object
}

View File

@@ -17,6 +17,7 @@ limitations under the License.
package builder
import (
corev1api "k8s.io/api/core/v1"
storagev1api "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@@ -87,3 +88,9 @@ func (b *StorageClassBuilder) Provisioner(provisioner string) *StorageClassBuild
b.object.Provisioner = provisioner
return b
}
// ReclaimPolicy sets StorageClass's reclaimPolicy.
func (b *StorageClassBuilder) ReclaimPolicy(policy corev1api.PersistentVolumeReclaimPolicy) *StorageClassBuilder {
b.object.ReclaimPolicy = &policy
return b
}

View File

@@ -75,7 +75,7 @@ func TestDeleteCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
if err != nil {

View File

@@ -63,7 +63,7 @@ func TestNewDescribeCommand(t *testing.T) {
if os.Getenv(cmdtest.CaptureFlag) == "1" {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)

View File

@@ -91,7 +91,7 @@ func TestNewDownloadCommand(t *testing.T) {
assert.NoError(t, e)
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDownloadCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDownloadCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
_, stderr, err := veleroexec.RunCommand(cmd)

View File

@@ -63,7 +63,7 @@ func TestNewGetCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
require.NoError(t, err)
@@ -84,7 +84,7 @@ func TestNewGetCommand(t *testing.T) {
e = d.Execute()
require.NoError(t, e)
cmd = exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd = exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err = veleroexec.RunCommand(cmd)
require.NoError(t, err)

View File

@@ -66,7 +66,7 @@ func TestNewDeleteCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDeleteCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDeleteCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)

View File

@@ -50,7 +50,7 @@ func TestNewGetCommand(t *testing.T) {
c.Execute()
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
_, stderr, err := veleroexec.RunCommand(cmd)

View File

@@ -99,7 +99,7 @@ func TestSetCommand_Execute(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestSetCommand_Execute"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestSetCommand_Execute"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
_, stderr, err := veleroexec.RunCommand(cmd)

View File

@@ -18,6 +18,7 @@ package bug
import (
"bytes"
"context"
"errors"
"fmt"
"net/url"
@@ -147,7 +148,7 @@ func getKubectlVersion() (string, error) {
return "", errors.New("kubectl not found on PATH")
}
kubectlCmd := exec.Command("kubectl", "version")
kubectlCmd := exec.CommandContext(context.Background(), "kubectl", "version")
var outbuf bytes.Buffer
kubectlCmd.Stdout = &outbuf
if err := kubectlCmd.Start(); err != nil {
@@ -207,16 +208,17 @@ func renderToString(bugInfo *VeleroBugInfo) (string, error) {
// a platform specific binary.
func showIssueInBrowser(body string) error {
url := issueURL + "?body=" + url.QueryEscape(body)
ctx := context.Background()
switch runtime.GOOS {
case "darwin":
return exec.Command("open", url).Start()
return exec.CommandContext(ctx, "open", url).Start()
case "linux":
if cmdExistsOnPath("xdg-open") {
return exec.Command("xdg-open", url).Start()
return exec.CommandContext(ctx, "xdg-open", url).Start()
}
return fmt.Errorf("velero can't open a browser window using the command '%s'", "xdg-open")
case "windows":
return exec.Command("rundll32", "url.dll,FileProtocolHandler", url).Start()
return exec.CommandContext(ctx, "rundll32", "url.dll,FileProtocolHandler", url).Start()
default:
return fmt.Errorf("velero can't open a browser window on platform %s", runtime.GOOS)
}

View File

@@ -53,6 +53,7 @@ type dataMoverRestoreConfig struct {
volumePath string
volumeMode string
ddName string
cacheDir string
resourceTimeout time.Duration
}
@@ -89,6 +90,7 @@ func NewRestoreCommand(f client.Factory) *cobra.Command {
command.Flags().StringVar(&config.volumePath, "volume-path", config.volumePath, "The full path of the volume to be restored")
command.Flags().StringVar(&config.volumeMode, "volume-mode", config.volumeMode, "The mode of the volume to be restored")
command.Flags().StringVar(&config.ddName, "data-download", config.ddName, "The data download name")
command.Flags().StringVar(&config.cacheDir, "cache-volume-path", config.cacheDir, "The full path of the cache volume")
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters.")
_ = command.MarkFlagRequired("volume-path")
@@ -288,5 +290,5 @@ func (s *dataMoverRestore) createDataPathService() (dataPathService, error) {
return datamover.NewRestoreMicroService(s.ctx, s.client, s.kubeClient, s.config.ddName, s.namespace, s.nodeName, datapath.AccessPoint{
ByPath: s.config.volumePath,
VolMode: uploader.PersistentVolumeMode(s.config.volumeMode),
}, s.dataPathMgr, repoEnsurer, credGetter, duInformer, s.logger), nil
}, s.dataPathMgr, repoEnsurer, credGetter, duInformer, s.config.cacheDir, s.logger), nil
}

View File

@@ -545,24 +545,22 @@ func (o *Options) Validate(c *cobra.Command, args []string, f client.Factory) er
return fmt.Errorf("fail to create go-client %w", err)
}
// If either Linux or Windows node-agent is installed, and the node-agent-configmap
// is specified, need to validate the ConfigMap.
if (o.UseNodeAgent || o.UseNodeAgentWindows) && len(o.NodeAgentConfigMap) > 0 {
if len(o.NodeAgentConfigMap) > 0 {
if err := kubeutil.VerifyJSONConfigs(c.Context(), o.Namespace, crClient, o.NodeAgentConfigMap, &velerotypes.NodeAgentConfigs{}); err != nil {
return fmt.Errorf("--node-agent-configmap specified ConfigMap %s is invalid", o.NodeAgentConfigMap)
return fmt.Errorf("--node-agent-configmap specified ConfigMap %s is invalid: %w", o.NodeAgentConfigMap, err)
}
}
if len(o.RepoMaintenanceJobConfigMap) > 0 {
if err := kubeutil.VerifyJSONConfigs(c.Context(), o.Namespace, crClient, o.RepoMaintenanceJobConfigMap, &velerotypes.JobConfigs{}); err != nil {
return fmt.Errorf("--repo-maintenance-job-configmap specified ConfigMap %s is invalid", o.RepoMaintenanceJobConfigMap)
return fmt.Errorf("--repo-maintenance-job-configmap specified ConfigMap %s is invalid: %w", o.RepoMaintenanceJobConfigMap, err)
}
}
if len(o.BackupRepoConfigMap) > 0 {
config := make(map[string]any)
if err := kubeutil.VerifyJSONConfigs(c.Context(), o.Namespace, crClient, o.BackupRepoConfigMap, &config); err != nil {
return fmt.Errorf("--backup-repository-configmap specified ConfigMap %s is invalid", o.BackupRepoConfigMap)
return fmt.Errorf("--backup-repository-configmap specified ConfigMap %s is invalid: %w", o.BackupRepoConfigMap, err)
}
}

View File

@@ -60,6 +60,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/exposer"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
repository "github.com/vmware-tanzu/velero/pkg/repository/manager"
velerotypes "github.com/vmware-tanzu/velero/pkg/types"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/kube"
@@ -84,6 +85,7 @@ type nodeAgentServerConfig struct {
resourceTimeout time.Duration
dataMoverPrepareTimeout time.Duration
nodeAgentConfig string
backupRepoConfig string
}
func NewServerCommand(f client.Factory) *cobra.Command {
@@ -121,6 +123,7 @@ func NewServerCommand(f client.Factory) *cobra.Command {
command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics")
command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-configmap", config.nodeAgentConfig, "The name of ConfigMap containing node-agent configurations.")
command.Flags().StringVar(&config.backupRepoConfig, "backup-repository-configmap", config.backupRepoConfig, "The name of ConfigMap containing backup repository configurations.")
return command
}
@@ -140,7 +143,9 @@ type nodeAgentServer struct {
csiSnapshotClient *snapshotv1client.Clientset
dataPathMgr *datapath.Manager
dataPathConfigs *velerotypes.NodeAgentConfigs
backupRepoConfigs map[string]string
vgdpCounter *exposer.VgdpCounter
repoConfigMgr repository.ConfigManager
}
func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, config nodeAgentServerConfig) (*nodeAgentServer, error) {
@@ -234,6 +239,7 @@ func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, confi
namespace: factory.Namespace(),
nodeName: nodeName,
metricsAddress: config.metricsAddress,
repoConfigMgr: repository.NewConfigManager(logger),
}
// the cache isn't initialized yet when "validatePodVolumesHostPath" is called, the client returned by the manager cannot
@@ -254,6 +260,11 @@ func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, confi
if err := s.getDataPathConfigs(); err != nil {
return nil, err
}
if err := s.getBackupRepoConfigs(); err != nil {
return nil, err
}
s.dataPathMgr = datapath.NewManager(s.getDataPathConcurrentNum(defaultDataPathConcurrentNum))
return s, nil
@@ -308,6 +319,8 @@ func (s *nodeAgentServer) run() {
s.logger.Infof("Using customized backupPVC config %v", backupPVCConfig)
}
privilegedFsBackup := s.dataPathConfigs != nil && s.dataPathConfigs.PrivilegedFsBackup
podResources := corev1api.ResourceRequirements{}
if s.dataPathConfigs != nil && s.dataPathConfigs.PodResources != nil {
if res, err := kube.ParseResourceRequirements(s.dataPathConfigs.PodResources.CPURequest, s.dataPathConfigs.PodResources.MemoryRequest, s.dataPathConfigs.PodResources.CPULimit, s.dataPathConfigs.PodResources.MemoryLimit); err != nil {
@@ -327,12 +340,30 @@ func (s *nodeAgentServer) run() {
}
}
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass)
if s.dataPathConfigs != nil && s.dataPathConfigs.CachePVCConfig != nil {
if err := s.validateCachePVCConfig(*s.dataPathConfigs.CachePVCConfig); err != nil {
s.logger.WithError(err).Warnf("Ignore cache config %v", s.dataPathConfigs.CachePVCConfig)
} else {
s.logger.Infof("Using cache volume configs %v", s.dataPathConfigs.CachePVCConfig)
}
}
var cachePVCConfig *velerotypes.CachePVC
if s.dataPathConfigs != nil && s.dataPathConfigs.CachePVCConfig != nil {
cachePVCConfig = s.dataPathConfigs.CachePVCConfig
s.logger.Infof("Using customized cachePVC config %v", cachePVCConfig)
}
if s.backupRepoConfigs != nil {
s.logger.Infof("Using backup repo config %v", s.backupRepoConfigs)
}
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass, privilegedFsBackup)
if err := pvbReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup)
}
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger, dataMovePriorityClass)
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, s.backupRepoConfigs, cachePVCConfig, podResources, s.logger, dataMovePriorityClass, privilegedFsBackup, s.repoConfigMgr)
if err := pvrReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller")
}
@@ -376,12 +407,15 @@ func (s *nodeAgentServer) run() {
s.vgdpCounter,
loadAffinity,
restorePVCConfig,
s.backupRepoConfigs,
cachePVCConfig,
podResources,
s.nodeName,
s.config.dataMoverPrepareTimeout,
s.logger,
s.metrics,
dataMovePriorityClass,
s.repoConfigMgr,
)
if err := dataDownloadReconciler.SetupWithManager(s.mgr); err != nil {
@@ -555,14 +589,32 @@ func (s *nodeAgentServer) getDataPathConfigs() error {
configs, err := getConfigsFunc(s.ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig)
if err != nil {
s.logger.WithError(err).Errorf("Failed to get node agent configs from configMap %s, ignore it", s.config.nodeAgentConfig)
return err
return errors.Wrapf(err, "error getting node agent configs from configMap %s", s.config.nodeAgentConfig)
}
s.dataPathConfigs = configs
return nil
}
func (s *nodeAgentServer) getBackupRepoConfigs() error {
if s.config.backupRepoConfig == "" {
s.logger.Info("No backup repo configMap is specified")
return nil
}
cm, err := s.kubeClient.CoreV1().ConfigMaps(s.namespace).Get(s.ctx, s.config.backupRepoConfig, metav1.GetOptions{})
if err != nil {
return errors.Wrapf(err, "error getting backup repo configs from configMap %s", s.config.backupRepoConfig)
}
if cm.Data == nil {
return errors.Errorf("no data is in the backup repo configMap %s", s.config.backupRepoConfig)
}
s.backupRepoConfigs = cm.Data
return nil
}
func (s *nodeAgentServer) getDataPathConcurrentNum(defaultNum int) int {
configs := s.dataPathConfigs
@@ -618,3 +670,20 @@ func (s *nodeAgentServer) getDataPathConcurrentNum(defaultNum int) int {
return concurrentNum
}
func (s *nodeAgentServer) validateCachePVCConfig(config velerotypes.CachePVC) error {
if config.StorageClass == "" {
return errors.New("storage class is absent")
}
sc, err := s.kubeClient.StorageV1().StorageClasses().Get(s.ctx, config.StorageClass, metav1.GetOptions{})
if err != nil {
return errors.Wrapf(err, "error getting storage class %s", config.StorageClass)
}
if sc.ReclaimPolicy != nil && *sc.ReclaimPolicy != corev1api.PersistentVolumeReclaimDelete {
return errors.Errorf("unexpected storage class reclaim policy %v", *sc.ReclaimPolicy)
}
return nil
}

View File

@@ -24,6 +24,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1api "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -34,6 +35,8 @@ import (
"github.com/vmware-tanzu/velero/pkg/nodeagent"
testutil "github.com/vmware-tanzu/velero/pkg/test"
velerotypes "github.com/vmware-tanzu/velero/pkg/types"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
)
func Test_validatePodVolumesHostPath(t *testing.T) {
@@ -142,11 +145,10 @@ func Test_getDataPathConfigs(t *testing.T) {
getFunc func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error)
configMapName string
expectConfigs *velerotypes.NodeAgentConfigs
expectLog string
expectedErr string
}{
{
name: "no config specified",
expectLog: "No node-agent configMap is specified",
name: "no config specified",
},
{
name: "failed to get configs",
@@ -154,7 +156,7 @@ func Test_getDataPathConfigs(t *testing.T) {
getFunc: func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error) {
return nil, errors.New("fake-get-error")
},
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
expectedErr: "error getting node agent configs from configMap node-agent-config: fake-get-error",
},
{
name: "configs cm not found",
@@ -162,7 +164,7 @@ func Test_getDataPathConfigs(t *testing.T) {
getFunc: func(context.Context, string, kubernetes.Interface, string) (*velerotypes.NodeAgentConfigs, error) {
return nil, errors.New("fake-not-found-error")
},
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
expectedErr: "error getting node agent configs from configMap node-agent-config: fake-not-found-error",
},
{
@@ -177,23 +179,21 @@ func Test_getDataPathConfigs(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
logBuffer := ""
s := &nodeAgentServer{
config: nodeAgentServerConfig{
nodeAgentConfig: test.configMapName,
},
logger: testutil.NewSingleLogger(&logBuffer),
logger: testutil.NewLogger(),
}
getConfigsFunc = test.getFunc
s.getDataPathConfigs()
assert.Equal(t, test.expectConfigs, s.dataPathConfigs)
if test.expectLog == "" {
assert.Empty(t, logBuffer)
err := s.getDataPathConfigs()
if test.expectedErr == "" {
require.NoError(t, err)
assert.Equal(t, test.expectConfigs, s.dataPathConfigs)
} else {
assert.Contains(t, logBuffer, test.expectLog)
require.EqualError(t, err, test.expectedErr)
}
})
}
@@ -416,3 +416,117 @@ func Test_getDataPathConcurrentNum(t *testing.T) {
})
}
}
func TestGetBackupRepoConfigs(t *testing.T) {
cmNoData := builder.ForConfigMap(velerov1api.DefaultNamespace, "backup-repo-config").Result()
cmWithData := builder.ForConfigMap(velerov1api.DefaultNamespace, "backup-repo-config").Data("cacheLimit", "100").Result()
tests := []struct {
name string
configMapName string
kubeClientObj []runtime.Object
expectConfigs map[string]string
expectedErr string
}{
{
name: "no config specified",
},
{
name: "failed to get configs",
configMapName: "backup-repo-config",
expectedErr: "error getting backup repo configs from configMap backup-repo-config: configmaps \"backup-repo-config\" not found",
},
{
name: "configs data not found",
kubeClientObj: []runtime.Object{cmNoData},
configMapName: "backup-repo-config",
expectedErr: "no data is in the backup repo configMap backup-repo-config",
},
{
name: "succeed",
configMapName: "backup-repo-config",
kubeClientObj: []runtime.Object{cmWithData},
expectConfigs: map[string]string{"cacheLimit": "100"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
s := &nodeAgentServer{
namespace: velerov1api.DefaultNamespace,
kubeClient: fakeKubeClient,
config: nodeAgentServerConfig{
backupRepoConfig: test.configMapName,
},
logger: testutil.NewLogger(),
}
err := s.getBackupRepoConfigs()
if test.expectedErr == "" {
require.NoError(t, err)
require.Equal(t, test.expectConfigs, s.backupRepoConfigs)
} else {
require.EqualError(t, err, test.expectedErr)
}
})
}
}
func TestValidateCachePVCConfig(t *testing.T) {
scWithRetainPolicy := builder.ForStorageClass("fake-storage-class").ReclaimPolicy(corev1api.PersistentVolumeReclaimRetain).Result()
scWithDeletePolicy := builder.ForStorageClass("fake-storage-class").ReclaimPolicy(corev1api.PersistentVolumeReclaimDelete).Result()
scWithNoPolicy := builder.ForStorageClass("fake-storage-class").Result()
tests := []struct {
name string
config velerotypes.CachePVC
kubeClientObj []runtime.Object
expectedErr string
}{
{
name: "no storage class",
expectedErr: "storage class is absent",
},
{
name: "failed to get storage class",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
expectedErr: "error getting storage class fake-storage-class: storageclasses.storage.k8s.io \"fake-storage-class\" not found",
},
{
name: "storage class reclaim policy is not expected",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
kubeClientObj: []runtime.Object{scWithRetainPolicy},
expectedErr: "unexpected storage class reclaim policy Retain",
},
{
name: "storage class reclaim policy is delete",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
kubeClientObj: []runtime.Object{scWithDeletePolicy},
},
{
name: "storage class with no reclaim policy",
config: velerotypes.CachePVC{StorageClass: "fake-storage-class"},
kubeClientObj: []runtime.Object{scWithNoPolicy},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
s := &nodeAgentServer{
kubeClient: fakeKubeClient,
}
err := s.validateCachePVCConfig(test.config)
if test.expectedErr == "" {
require.NoError(t, err)
} else {
require.EqualError(t, err, test.expectedErr)
}
})
}
}

View File

@@ -51,6 +51,7 @@ import (
type podVolumeRestoreConfig struct {
volumePath string
pvrName string
cacheDir string
resourceTimeout time.Duration
}
@@ -86,6 +87,7 @@ func NewRestoreCommand(f client.Factory) *cobra.Command {
command.Flags().Var(formatFlag, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(formatFlag.AllowedValues(), ", ")))
command.Flags().StringVar(&config.volumePath, "volume-path", config.volumePath, "The full path of the volume to be restored")
command.Flags().StringVar(&config.pvrName, "pod-volume-restore", config.pvrName, "The PVR name")
command.Flags().StringVar(&config.cacheDir, "cache-volume-path", config.cacheDir, "The full path of the cache volume")
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters.")
_ = command.MarkFlagRequired("volume-path")
@@ -294,5 +296,5 @@ func (s *podVolumeRestore) createDataPathService() (dataPathService, error) {
return podvolume.NewRestoreMicroService(s.ctx, s.client, s.kubeClient, s.config.pvrName, s.namespace, s.nodeName, datapath.AccessPoint{
ByPath: s.config.volumePath,
VolMode: uploader.PersistentVolumeFilesystem,
}, s.dataPathMgr, repoEnsurer, credGetter, pvrInformer, s.logger), nil
}, s.dataPathMgr, repoEnsurer, credGetter, pvrInformer, s.config.cacheDir, s.logger), nil
}

View File

@@ -75,7 +75,7 @@ func TestDeleteCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
if err != nil {

View File

@@ -63,7 +63,7 @@ func TestNewDescribeCommand(t *testing.T) {
if os.Getenv(cmdtest.CaptureFlag) == "1" {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)

View File

@@ -62,7 +62,7 @@ func TestNewGetCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
require.NoError(t, err)

View File

@@ -713,6 +713,9 @@ func describeDataMovement(d *Describer, details bool, info *volume.BackupVolumeI
d.Printf("\t\t\t\tData Mover: %s\n", dataMover)
d.Printf("\t\t\t\tUploader Type: %s\n", info.SnapshotDataMovementInfo.UploaderType)
d.Printf("\t\t\t\tMoved data Size (bytes): %d\n", info.SnapshotDataMovementInfo.Size)
if info.SnapshotDataMovementInfo.IncrementalSize > 0 {
d.Printf("\t\t\t\tIncremental data Size (bytes): %d\n", info.SnapshotDataMovementInfo.IncrementalSize)
}
d.Printf("\t\t\t\tResult: %s\n", info.Result)
} else {
d.Printf("\t\t\tData Movement: %s\n", "included, specify --details for more information")
@@ -835,7 +838,7 @@ func describePodVolumeBackups(d *Describer, details bool, podVolumeBackups []vel
backupsByPod := new(volumesByPod)
for _, backup := range backupsByPhase[phase] {
backupsByPod.Add(backup.Spec.Pod.Namespace, backup.Spec.Pod.Name, backup.Spec.Volume, phase, backup.Status.Progress)
backupsByPod.Add(backup.Spec.Pod.Namespace, backup.Spec.Pod.Name, backup.Spec.Volume, phase, backup.Status.Progress, backup.Status.IncrementalBytes)
}
d.Printf("\t\t%s:\n", phase)
@@ -885,7 +888,8 @@ type volumesByPod struct {
// Add adds a pod volume with the specified pod namespace, name
// and volume to the appropriate group.
func (v *volumesByPod) Add(namespace, name, volume, phase string, progress veleroapishared.DataMoveOperationProgress) {
// Used for both backup and restore
func (v *volumesByPod) Add(namespace, name, volume, phase string, progress veleroapishared.DataMoveOperationProgress, incrementalBytes int64) {
if v.volumesByPodMap == nil {
v.volumesByPodMap = make(map[string]*podVolumeGroup)
}
@@ -895,6 +899,12 @@ func (v *volumesByPod) Add(namespace, name, volume, phase string, progress veler
// append backup progress percentage if backup is in progress
if phase == "In Progress" && progress.TotalBytes != 0 {
volume = fmt.Sprintf("%s (%.2f%%)", volume, float64(progress.BytesDone)/float64(progress.TotalBytes)*100)
} else if phase == string(velerov1api.PodVolumeBackupPhaseCompleted) && incrementalBytes > 0 {
volume = fmt.Sprintf("%s (size: %v, incremental size: %v)", volume, progress.TotalBytes, incrementalBytes)
} else if (phase == string(velerov1api.PodVolumeBackupPhaseCompleted) ||
phase == string(velerov1api.PodVolumeRestorePhaseCompleted)) &&
progress.TotalBytes > 0 {
volume = fmt.Sprintf("%s (size: %v)", volume, progress.TotalBytes)
}
if group, ok := v.volumesByPodMap[key]; !ok {

View File

@@ -597,11 +597,12 @@ func TestCSISnapshots(t *testing.T) {
Result: volume.VolumeResultFailed,
SnapshotDataMoved: true,
SnapshotDataMovementInfo: &volume.SnapshotDataMovementInfo{
UploaderType: "fake-uploader",
SnapshotHandle: "fake-repo-id-5",
OperationID: "fake-operation-5",
Size: 100,
Phase: velerov2alpha1.DataUploadPhaseFailed,
UploaderType: "fake-uploader",
SnapshotHandle: "fake-repo-id-5",
OperationID: "fake-operation-5",
Size: 100,
IncrementalSize: 50,
Phase: velerov2alpha1.DataUploadPhaseFailed,
},
},
},
@@ -613,6 +614,7 @@ func TestCSISnapshots(t *testing.T) {
Data Mover: velero
Uploader Type: fake-uploader
Moved data Size (bytes): 100
Incremental data Size (bytes): 50
Result: failed
`,
},

View File

@@ -464,6 +464,10 @@ func describeDataMovementInSF(details bool, info *volume.BackupVolumeInfo, snaps
dataMovement["uploaderType"] = info.SnapshotDataMovementInfo.UploaderType
dataMovement["result"] = string(info.Result)
if info.SnapshotDataMovementInfo.Size > 0 || info.SnapshotDataMovementInfo.IncrementalSize > 0 {
dataMovement["size"] = info.SnapshotDataMovementInfo.Size
dataMovement["incrementalSize"] = info.SnapshotDataMovementInfo.IncrementalSize
}
snapshotDetail["dataMovement"] = dataMovement
} else {
@@ -534,7 +538,7 @@ func describePodVolumeBackupsInSF(backups []velerov1api.PodVolumeBackup, details
// group the backups in the current phase by pod (i.e. "ns/name")
backupsByPod := new(volumesByPod)
for _, backup := range backupsByPhase[phase] {
backupsByPod.Add(backup.Spec.Pod.Namespace, backup.Spec.Pod.Name, backup.Spec.Volume, phase, backup.Status.Progress)
backupsByPod.Add(backup.Spec.Pod.Namespace, backup.Spec.Pod.Name, backup.Spec.Volume, phase, backup.Status.Progress, backup.Status.IncrementalBytes)
}
backupsByPods := make([]map[string]string, 0)

View File

@@ -408,7 +408,7 @@ func describePodVolumeRestores(d *Describer, restores []velerov1api.PodVolumeRes
restoresByPod := new(volumesByPod)
for _, restore := range restoresByPhase[phase] {
restoresByPod.Add(restore.Spec.Pod.Namespace, restore.Spec.Pod.Name, restore.Spec.Volume, phase, restore.Status.Progress)
restoresByPod.Add(restore.Spec.Pod.Namespace, restore.Spec.Pod.Name, restore.Spec.Volume, phase, restore.Status.Progress, 0)
}
d.Printf("\t%s:\n", phase)

View File

@@ -734,8 +734,8 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error {
// native snapshots phase will either be failed or completed right away
// https://github.com/vmware-tanzu/velero/blob/de3ea52f0cc478e99efa7b9524c7f353514261a4/pkg/backup/item_backupper.go#L632-L639
backup.Status.VolumeSnapshotsAttempted = len(backup.VolumeSnapshots)
for _, snap := range backup.VolumeSnapshots {
backup.Status.VolumeSnapshotsAttempted = len(backup.VolumeSnapshots.Get())
for _, snap := range backup.VolumeSnapshots.Get() {
if snap.Status.Phase == volume.SnapshotPhaseCompleted {
backup.Status.VolumeSnapshotsCompleted++
}
@@ -882,7 +882,7 @@ func persistBackup(backup *pkgbackup.Request,
}
// Velero-native volume snapshots (as opposed to CSI ones)
nativeVolumeSnapshots, errs := encode.ToJSONGzip(backup.VolumeSnapshots, "native volumesnapshots list")
nativeVolumeSnapshots, errs := encode.ToJSONGzip(backup.VolumeSnapshots.Get(), "native volumesnapshots list")
if errs != nil {
persistErrs = append(persistErrs, errs...)
}

View File

@@ -275,7 +275,7 @@ func (r *BackupRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request)
log.WithError(err).Warn("Failed to get keepLatestMaintenanceJobs from ConfigMap, using CLI parameter value")
}
if err := maintenance.DeleteOldJobs(r.Client, req.Name, keepJobs, log); err != nil {
if err := maintenance.DeleteOldJobs(r.Client, *backupRepo, keepJobs, log); err != nil {
log.WithError(err).Warn("Failed to delete old maintenance jobs")
}
}
@@ -608,7 +608,7 @@ func getBackupRepositoryConfig(ctx context.Context, ctrlClient client.Client, co
jsonData, found := loc.Data[repoType]
if !found {
log.Info("No data for repo type %s in config map %s", repoType, configName)
log.Infof("No data for repo type %s in config map %s", repoType, configName)
return nil, nil
}

Some files were not shown because too many files have changed in this diff Show More