Compare commits

...

55 Commits

Author SHA1 Message Date
Xun Jiang/Bruce Jiang
7112c62e49 Merge pull request #6842 from allenxu404/release-1.12
Modify changelogs for v1.12
2023-09-19 17:53:28 +08:00
allenxu404
dcb891a307 Modify changelogs for v1.12
Signed-off-by: allenxu404 <qix2@vmware.com>
2023-09-19 17:36:36 +08:00
lyndon
21353f00a8 Merge pull request #6841 from Lyndon-Li/release-1.12
[1.12] Doc for multiple snapshot class
2023-09-19 16:45:26 +08:00
Lyndon-Li
5e7114899b doc for multiple snapshot class
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-19 16:28:28 +08:00
Wenkai Yin(尹文开)
b035680ce6 Set data mover related properties for schedule (#6823)
Set data mover related properties for schedule

Fixes #6820

Signed-off-by: Wenkai Yin(尹文开) <yinw@vmware.com>
2023-09-14 18:04:48 +08:00
lyndon
9eb133e635 Merge pull request #6821 from reasonerjt/update-kopia-repo-1.12
[cherrypick-1.12]Switch the kopia repo to new org
2023-09-14 11:53:12 +08:00
Daniel Jiang
6f1262d4c6 Switch the kopia repo to new org
Signed-off-by: Daniel Jiang <jiangd@vmware.com>
2023-09-14 11:19:43 +08:00
lyndon
48e3278c6c Merge pull request #6814 from allenxu404/release-1.12
[Cherry-pick]Add doc changes after rc1 to v1.12 docs
2023-09-14 10:25:23 +08:00
Qi Xu
acfc6e474f Add doc changes after rc1 to v1.12 docs (#6812)
Signed-off-by: allenxu404 <qix2@vmware.com>
2023-09-13 18:43:00 +08:00
qiuming
993d2c775f Merge pull request #6803 from qiuming-best/finalizer-optimize
Optimize of removing dataupload datadownload finalizer
2023-09-12 15:44:21 +08:00
qiuming
b70b01cde9 Merge branch 'release-1.12' into finalizer-optimize 2023-09-12 15:00:58 +08:00
Anshul Ahuja
8b8a5a2bcc use old namespace in resource modifier (#6724) (#6794)
* use old namespace in resource modifier



* add changelog



* update docs



* updated after review



---------

Signed-off-by: lou <alex1988@outlook.com>
Signed-off-by: Anshul Ahuja <anshulahuja@microsoft.com>
Co-authored-by: Guang Jiong Lou <7991675+27149chen@users.noreply.github.com>
Co-authored-by: Daniel Jiang <jiangd@vmware.com>
2023-09-12 14:54:03 +08:00
qiuming
5b36cd7e83 Merge branch 'release-1.12' into finalizer-optimize 2023-09-12 11:57:07 +08:00
Ming Qiu
3240fb196c Optimize of removing finalizer no matter the dataupload datadownload cr is been deleted or not
Signed-off-by: Ming Qiu <mqiu@vmware.com>
2023-09-12 11:55:44 +08:00
Daniel Jiang
d9859d99ba Merge pull request #6793 from Lyndon-Li/release-1.12
[1.12] Add csi snapshot data mover doc
2023-09-08 18:09:31 +08:00
Lyndon-Li
18d4fe45e8 add csi snapshot data movement doc
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-08 17:51:49 +08:00
qiuming
60d5bb22f7 Merge pull request #6791 from Lyndon-Li/release-1.12
[1.12] Fix issue 6785
2023-09-08 16:32:39 +08:00
Lyndon-Li
9468b8cfa9 fix issue 6785
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-08 15:18:02 +08:00
lyndon
420562111b Merge pull request #6789 from Lyndon-Li/release-1.12
[1.12] Fix issue 6748 [2]
2023-09-08 15:15:19 +08:00
lyndon
cf0b2e9139 Merge branch 'release-1.12' into release-1.12 2023-09-08 14:57:03 +08:00
lyndon
506415e60c Merge pull request #6787 from kaovilai/fsb-yaml-example-r1.12
cherry-pick: r1.12: Show yaml example of repository password: file-system-backup.md #6783
2023-09-08 12:53:42 +08:00
Lyndon-Li
3733a40637 fix issue 6748
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-08 09:53:00 +08:00
Tiger Kaovilai
fe1ade0226 Show yaml example of repository password: file-system-backup.md
Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
2023-09-07 11:54:10 -04:00
Xun Jiang/Bruce Jiang
86e1a74937 Merge pull request #6778 from allenxu404/release-1.12
[cherry-pick] add note for backup repository password configuration
2023-09-07 10:25:05 +08:00
Shubham Pampattiwar
6260a44e62 add note for backup repository password configuration
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>

address PR feedback

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>

reword the note

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>

change FS backups to normal backups in the note

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2023-09-06 17:58:57 +08:00
Xun Jiang/Bruce Jiang
06d9bfae8d Merge pull request #6762 from blackpiglet/6750_fix_1.12
[cherry-pick][release-1.12]Fix #6752: add namespace exclude check.
2023-09-06 15:44:09 +08:00
Xun Jiang
4d1617470f Fix #6752: add namespace exclude check.
Add PSA audit and warn labels.

Signed-off-by: Xun Jiang <jxun@vmware.com>
2023-09-06 15:28:17 +08:00
lyndon
1b2c82c9eb Merge pull request #6772 from Lyndon-Li/release-1.12
[1.12] Fix issue 6748
2023-09-06 11:21:19 +08:00
Lyndon-Li
040060082a fix issue 6748
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-06 10:37:47 +08:00
Wenkai Yin(尹文开)
fc653bdfbe Update restore controller logic for restore deletion (#6761)
1. Skip deleting the restore files from storage if the backup/BSL is not found
2. Allow deleting the restore files from storage even though the BSL is readonly

Signed-off-by: Wenkai Yin(尹文开) <yinw@vmware.com>
2023-09-06 08:38:45 +08:00
lyndon
6790a18814 Merge pull request #6758 from Lyndon-Li/release-1.12
[1.12] Fix issue 6753
2023-09-05 11:06:55 +08:00
Lyndon-Li
93995bfd00 fix issue 6753
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-05 10:44:58 +08:00
lyndon
80572934dc Merge pull request #6743 from Lyndon-Li/release-1.12-1
[1.12] Fix issue 6733
2023-09-01 18:40:11 +08:00
lyndon
41d9b67945 Merge branch 'release-1.12' into release-1.12-1 2023-09-01 18:22:05 +08:00
lyndon
a06107ac70 Merge pull request #6742 from Lyndon-Li/release-1.12
[1.12] Fix issue 6709
2023-09-01 18:18:27 +08:00
lyndon
40a94e39ad Merge branch 'release-1.12' into release-1.12-1 2023-09-01 18:07:13 +08:00
lyndon
7ea0d434d6 Merge branch 'release-1.12' into release-1.12 2023-09-01 17:53:16 +08:00
qiuming
6b884ecc39 Merge pull request #6740 from qiuming-best/release-1.12
Fix kopia snapshot policy not work
2023-09-01 17:43:36 +08:00
Lyndon-Li
183f7ac154 fix issue 6733
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-01 17:30:52 +08:00
lyndon
75bda412a1 fix issue 6709 (#6741)
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-09-01 17:12:31 +08:00
Ming Qiu
a2eb10df8f Fix kopia snapshot policy not work
Signed-off-by: Ming Qiu <mqiu@vmware.com>
2023-09-01 16:34:02 +08:00
Daniel Jiang
90bc1abd21 Merge pull request #6725 from anshulahuja98/cherrypick-6704
[1.12] Cherry Pick - add label selector in Resource Modifiers  (#6704)
2023-08-31 15:59:34 +08:00
Guang Jiong Lou
45165503ba add label selector in Resource Modifiers (#6704)
* add label selector in resource modifier

Signed-off-by: lou <alex1988@outlook.com>

* add ut

Signed-off-by: lou <alex1988@outlook.com>

* update after review

Signed-off-by: lou <alex1988@outlook.com>

* update after review

Signed-off-by: lou <alex1988@outlook.com>

---------

Signed-off-by: lou <alex1988@outlook.com>
Signed-off-by: Anshul Ahuja <anshulahuja@microsoft.com>
2023-08-31 10:52:03 +05:30
qiuming
53530130a5 Fix velero uninstall bug (#6720)
Signed-off-by: Ming <mqiu@vmware.com>
2023-08-30 17:54:30 +08:00
danfengliu
ed256d74dd Merge pull request #6708 from danfengliu/cherry-pick-fix-nightly-issues
[cherry-pick to v1.12] monitor velero logs and fix E2E issues
2023-08-29 15:40:18 +08:00
Xun Jiang/Bruce Jiang
ab28a09a07 Merge branch 'release-1.12' into cherry-pick-fix-nightly-issues 2023-08-29 10:17:53 +08:00
qiuming
90f4cc5497 Merge pull request #6707 from qiuming-best/mqiu-release-1.12
make Velero uninstall backward compatible
2023-08-29 09:03:37 +08:00
qiuming
f505ed709b Merge branch 'release-1.12' into mqiu-release-1.12 2023-08-28 17:31:48 +08:00
Ming
28074e3f37 make velero uninstall backward compatible
Signed-off-by: Ming <mqiu@vmware.com>
2023-08-28 09:31:29 +00:00
danfengl
240f33c09d monitor velero logs and fix E2E issues
1. Capture Velero pod log and K8S cluster event;
2. Fix wrong path of storageclass yaml file issue caused by pert test;
3. Fix change storageclass test issue that no sc named 'default' in EKS cluster;
4. Support AWS credential as config format;
5. Support more E2E script input parameters like standy cluster plugins and provider.

Signed-off-by: danfengl <danfengl@vmware.com>
2023-08-28 06:04:09 +00:00
lyndon
fd08848471 fix issue 6391 (#6703)
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
2023-08-25 16:36:45 +08:00
qiuming
5f585be24b Merge pull request #6701 from qiuming-best/mqiu-release-1.12
[CherryPick v1.12] Fix delete dataupload datadownload CR failure
2023-08-25 11:45:33 +08:00
Ming
5480acf0a0 [CherryPick v1.12] Fix delete dataupload datadownload failure when Velero uninstall
Signed-off-by: Ming <mqiu@vmware.com>
2023-08-25 03:24:40 +00:00
Daniel Jiang
e2d3e84bab skip subresource in resource discovery (#6688)
Signed-off-by: lou <alex1988@outlook.com>
Co-authored-by: lou <alex1988@outlook.com>
2023-08-23 15:13:34 +08:00
Qi Xu
0c0ccf949b Ping golang/distroless image to latest version (#6679)
Signed-off-by: allenxu404 <qix2@vmware.com>
2023-08-18 19:17:50 +08:00
78 changed files with 2198 additions and 439 deletions

View File

@@ -14,7 +14,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.20'
go-version: '1.20.7'
id: go
# Look for a CLI that's made for this PR
- name: Fetch built CLI

View File

@@ -14,7 +14,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.20'
go-version: '1.20.7'
id: go
# Look for a CLI that's made for this PR
- name: Fetch built CLI
@@ -72,7 +72,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.20'
go-version: '1.20.7'
id: go
- name: Check out the code
uses: actions/checkout@v2

View File

@@ -10,7 +10,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.20'
go-version: '1.20.7'
id: go
- name: Check out the code
uses: actions/checkout@v2

View File

@@ -18,7 +18,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.20'
go-version: '1.20.7'
id: go
- uses: actions/checkout@v3

View File

@@ -13,7 +13,7 @@
# limitations under the License.
# Velero binary build section
FROM --platform=$BUILDPLATFORM golang:1.20-bullseye as velero-builder
FROM --platform=$BUILDPLATFORM golang:1.20.7-bullseye as velero-builder
ARG GOPROXY
ARG BIN
@@ -46,7 +46,7 @@ RUN mkdir -p /output/usr/bin && \
-ldflags "${LDFLAGS}" ${PKG}/cmd/velero-helper
# Restic binary build section
FROM --platform=$BUILDPLATFORM golang:1.20-bullseye as restic-builder
FROM --platform=$BUILDPLATFORM golang:1.20.7-bullseye as restic-builder
ARG BIN
ARG TARGETOS
@@ -68,7 +68,7 @@ RUN mkdir -p /output/usr/bin && \
/go/src/github.com/vmware-tanzu/velero/hack/build-restic.sh
# Velero image packing section
FROM gcr.io/distroless/base-nossl-debian11:nonroot
FROM gcr.io/distroless/base-nossl-debian11@sha256:f10e1fbf558c630a4b74a987e6c754d45bf59f9ddcefce090f6b111925996767
LABEL maintainer="Xun Jiang <jxun@vmware.com>"

View File

@@ -52,7 +52,7 @@ git_sha = str(local("git rev-parse HEAD", quiet = True, echo_off = True)).strip(
tilt_helper_dockerfile_header = """
# Tilt image
FROM golang:1.20 as tilt-helper
FROM golang:1.20.7 as tilt-helper
# Support live reloading with Tilt
RUN wget --output-document /restart.sh --quiet https://raw.githubusercontent.com/windmilleng/rerun-process-wrapper/master/restart.sh && \

View File

@@ -51,6 +51,7 @@ To fix CVEs and keep pace with Golang, Velero made changes as follows:
* Prior to v1.12, the parameter `uploader-type` for Velero installation had a default value of "restic". However, starting from this version, the default value has been changed to "kopia". This means that Velero will now use Kopia as the default path for file system backup.
* The ways of setting CSI snapshot time have changed in v1.12. First, the sync waiting time for creating a snapshot handle in the CSI plugin is changed from the fixed 10 minutes into backup.Spec.CSISnapshotTimeout. The second, the async waiting time for VolumeSnapshot and VolumeSnapshotContent's status turning into `ReadyToUse` in operation uses the operation's timeout. The default value is 4 hours.
* As from [Velero helm chart v4.0.0](https://github.com/vmware-tanzu/helm-charts/releases/tag/velero-4.0.0), it supports multiple BSL and VSL, and the BSL and VSL have changed from the map into a slice, and[ this breaking change](https://github.com/vmware-tanzu/helm-charts/pull/413) is not backward compatible. So it would be best to change the BSL and VSL configuration into slices before the Upgrade.
* Prior to v1.12, deleting the Velero namespace would easily remove all the resources within it. However, with the introduction of finalizers attached to the Velero CR including `restore`, `dataupload`, and `datadownload` in this version, directly deleting Velero namespace may get stuck indefinitely because the pods responsible for handling the finalizers might be deleted before the resources attached to the finalizers. To avoid this issue, please use the command `velero uninstall` to delete all the Velero resources or ensure that you handle the finalizer appropriately before deleting the Velero namespace.
### Limitations/Known issues
@@ -132,3 +133,10 @@ prior PVC restores with CSI (#6111, @eemcmullan)
* Make GetPluginConfig accessible from other packages. (#6151, @tkaovila)
* Ignore not found error during patching managedFields (#6136, @ywk253100)
* Fix the goreleaser issues and add a new goreleaser action (#6109, @blackpiglet)
* Add CSI snapshot data movement doc (#6793, @Lyndon-Li)
* Use old(origin) namespace in resource modifier conditions in case namespace may change during restore (#6724, @27149chen)
* Fix #6752: add namespace exclude check. (#6762, @blackpiglet)
* Update restore controller logic for restore deletion (#6761, @ywk253100)
* Fix issue #6753, remove the check for read-only BSL in restore async operation controller since Velero cannot fully support read-only mode BSL in restore at present (#6758, @Lyndon-Li)
* Fixes #6636, skip subresource in resource discovery (#6688, @27149chen)
* This pr made some improvements in Resource Modifiers:1. add label selector 2. change the field name from groupKind to groupResource (#6704, @27149chen)

View File

@@ -26,7 +26,7 @@ Currently velero supports substituting certain values in the K8s resources durin
<!-- ## Background -->
## Goals
- Allow the user to specify a GroupKind, Name(optional), JSON patch for modification.
- Allow the user to specify a GroupResource, Name(optional), JSON patch for modification.
- Allow the user to specify multiple JSON patch.
## Non Goals
@@ -74,7 +74,7 @@ velero restore create --from-backup backup-1 --resource-modifier-configmap resou
### Resource Modifier ConfigMap Structure
- User first needs to provide details on which resources the JSON Substitutions need to be applied.
- For this the user will provide 4 inputs - Namespaces(for NS Scoped resources), GroupKind (kind.group format similar to includeResources field in velero) and Name Regex(optional).
- For this the user will provide 4 inputs - Namespaces(for NS Scoped resources), GroupResource (resource.group format similar to includeResources field in velero) and Name Regex(optional).
- If the user does not provide the Name, the JSON Substitutions will be applied to all the resources of the given Group and Kind under the given namespaces.
- Further the use will specify the JSON Patch using the structure of kubectl's "JSON Patch" based inputs.
@@ -83,7 +83,7 @@ velero restore create --from-backup backup-1 --resource-modifier-configmap resou
version: v1
resourceModifierRules:
- conditions:
groupKind: persistentvolumeclaims
groupResource: persistentvolumeclaims
resourceNameRegex: "mysql.*"
namespaces:
- bar
@@ -96,6 +96,7 @@ resourceModifierRules:
path: "/metadata/labels/test"
```
- The above configmap will apply the JSON Patch to all the PVCs in the namespaces bar and foo with name starting with mysql. The JSON Patch will replace the storageClassName with "premium" and remove the label "test" from the PVCs.
- Note that the Namespace here is the original namespace of the backed up resource, not the new namespace where the resource is going to be restored.
- The user can specify multiple JSON Patches for a particular resource. The patches will be applied in the order specified in the configmap. A subsequent patch is applied in order and if multiple patches are specified for the same path, the last patch will override the previous patches.
- The user can specify multiple resourceModifierRules in the configmap. The rules will be applied in the order specified in the configmap.
@@ -119,7 +120,7 @@ kubectl create cm <configmap-name> --from-file <yaml-file> -n velero
version: v1
resourceModifierRules:
- conditions:
groupKind: persistentvolumeclaims.storage.k8s.io
groupResource: persistentvolumeclaims.storage.k8s.io
resourceNameRegex: ".*"
namespaces:
- bar

View File

@@ -67,12 +67,12 @@ The Velero CSI plugin chooses the VolumeSnapshotClass in the cluster that has th
metadata:
name: backup-1
annotations:
velero.io/csi-volumesnapshot-class/csi.cloud.disk.driver: csi-diskdriver-snapclass
velero.io/csi-volumesnapshot-class/csi.cloud.file.driver: csi-filedriver-snapclass
velero.io/csi-volumesnapshot-class/<driver name>: csi-snapclass
velero.io/csi-volumesnapshot-class_csi.cloud.disk.driver: csi-diskdriver-snapclass
velero.io/csi-volumesnapshot-class_csi.cloud.file.driver: csi-filedriver-snapclass
velero.io/csi-volumesnapshot-class_<driver name>: csi-snapclass
```
To query the annotations on a backup: "velero.io/csi-volumesnapshot-class/'driver name'" - where driver names comes from the PVC's driver.
To query the annotations on a backup: "velero.io/csi-volumesnapshot-class_'driver name'" - where driver names comes from the PVC's driver.
2. **Support VolumeSnapshotClass selection at PVC level**
The user can annotate the PVCs with driver and VolumeSnapshotClass name. The CSI plugin will use the VolumeSnapshotClass specified in the annotation. If the annotation is not present, the CSI plugin will use the default VolumeSnapshotClass for the driver. If the VolumeSnapshotClass provided is of a different driver, the CSI plugin will use the default VolumeSnapshotClass for the driver.

2
go.mod
View File

@@ -158,3 +158,5 @@ require (
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
)
replace github.com/kopia/kopia => github.com/project-velero/kopia v0.13.0-velero.1

4
go.sum
View File

@@ -493,8 +493,6 @@ github.com/klauspost/reedsolomon v1.11.7/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kopia/htmluibuild v0.0.0-20230326183719-f482ef17e2c9 h1:s5Wa89s8RlPjuwqd8K8kuf+T9Kz4+NsbKwR/pJ3PAT0=
github.com/kopia/kopia v0.13.0 h1:efxs/vw1cS9HldlHcQ8TPxlsYz+cWCkiS4IWMbR3D1s=
github.com/kopia/kopia v0.13.0/go.mod h1:Iic7CcKhsq+A7MLR9hh6VJfgpcJhLx3Kn+BgjY+azvI=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@@ -617,6 +615,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA=
github.com/project-velero/kopia v0.13.0-velero.1 h1:rjiP7os4Eaek/gbyIKqzwkp2XLbJHl0NkczSgJ7AOEo=
github.com/project-velero/kopia v0.13.0-velero.1/go.mod h1:Iic7CcKhsq+A7MLR9hh6VJfgpcJhLx3Kn+BgjY+azvI=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
FROM --platform=linux/amd64 golang:1.20-bullseye
FROM --platform=linux/amd64 golang:1.20.7-bullseye
ARG GOPROXY

View File

@@ -2,7 +2,6 @@ package resourcemodifiers
import (
"fmt"
"io"
"regexp"
"strconv"
"strings"
@@ -10,9 +9,11 @@ import (
jsonpatch "github.com/evanphx/json-patch"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"sigs.k8s.io/yaml"
"github.com/vmware-tanzu/velero/pkg/util/collections"
)
@@ -23,26 +24,27 @@ const (
)
type JSONPatch struct {
Operation string `yaml:"operation"`
From string `yaml:"from,omitempty"`
Path string `yaml:"path"`
Value string `yaml:"value,omitempty"`
Operation string `json:"operation"`
From string `json:"from,omitempty"`
Path string `json:"path"`
Value string `json:"value,omitempty"`
}
type Conditions struct {
Namespaces []string `yaml:"namespaces,omitempty"`
GroupKind string `yaml:"groupKind"`
ResourceNameRegex string `yaml:"resourceNameRegex"`
Namespaces []string `json:"namespaces,omitempty"`
GroupResource string `json:"groupResource"`
ResourceNameRegex string `json:"resourceNameRegex,omitempty"`
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
}
type ResourceModifierRule struct {
Conditions Conditions `yaml:"conditions"`
Patches []JSONPatch `yaml:"patches"`
Conditions Conditions `json:"conditions"`
Patches []JSONPatch `json:"patches"`
}
type ResourceModifiers struct {
Version string `yaml:"version"`
ResourceModifierRules []ResourceModifierRule `yaml:"resourceModifierRules"`
Version string `json:"version"`
ResourceModifierRules []ResourceModifierRule `json:"resourceModifierRules"`
}
func GetResourceModifiersFromConfig(cm *v1.ConfigMap) (*ResourceModifiers, error) {
@@ -58,7 +60,7 @@ func GetResourceModifiersFromConfig(cm *v1.ConfigMap) (*ResourceModifiers, error
yamlData = v
}
resModifiers, err := unmarshalResourceModifiers(&yamlData)
resModifiers, err := unmarshalResourceModifiers([]byte(yamlData))
if err != nil {
return nil, errors.WithStack(err)
}
@@ -83,9 +85,11 @@ func (r *ResourceModifierRule) Apply(obj *unstructured.Unstructured, groupResour
if !namespaceInclusion.ShouldInclude(obj.GetNamespace()) {
return nil
}
if !strings.EqualFold(groupResource, r.Conditions.GroupKind) {
if r.Conditions.GroupResource != groupResource {
return nil
}
if r.Conditions.ResourceNameRegex != "" {
match, err := regexp.MatchString(r.Conditions.ResourceNameRegex, obj.GetName())
if err != nil {
@@ -95,6 +99,17 @@ func (r *ResourceModifierRule) Apply(obj *unstructured.Unstructured, groupResour
return nil
}
}
if r.Conditions.LabelSelector != nil {
selector, err := metav1.LabelSelectorAsSelector(r.Conditions.LabelSelector)
if err != nil {
return errors.Errorf("error in creating label selector %s", err.Error())
}
if !selector.Matches(labels.Set(obj.GetLabels())) {
return nil
}
}
patches, err := r.PatchArrayToByteArray()
if err != nil {
return err
@@ -107,7 +122,7 @@ func (r *ResourceModifierRule) Apply(obj *unstructured.Unstructured, groupResour
return nil
}
// convert all JsonPatch to string array with the format of jsonpatch.Patch and then convert it to byte array
// PatchArrayToByteArray converts all JsonPatch to string array with the format of jsonpatch.Patch and then convert it to byte array
func (r *ResourceModifierRule) PatchArrayToByteArray() ([]byte, error) {
var patches []string
for _, patch := range r.Patches {
@@ -148,22 +163,15 @@ func ApplyPatch(patch []byte, obj *unstructured.Unstructured, log logrus.FieldLo
return nil
}
func unmarshalResourceModifiers(yamlData *string) (*ResourceModifiers, error) {
func unmarshalResourceModifiers(yamlData []byte) (*ResourceModifiers, error) {
resModifiers := &ResourceModifiers{}
err := decodeStruct(strings.NewReader(*yamlData), resModifiers)
err := yaml.UnmarshalStrict(yamlData, resModifiers)
if err != nil {
return nil, fmt.Errorf("failed to decode yaml data into resource modifiers %v", err)
}
return resModifiers, nil
}
// decodeStruct restrict validate the keys in decoded mappings to exist as fields in the struct being decoded into
func decodeStruct(r io.Reader, s interface{}) error {
dec := yaml.NewDecoder(r)
dec.KnownFields(true)
return dec.Decode(s)
}
func addQuotes(value string) bool {
if value == "" {
return true

View File

@@ -18,7 +18,7 @@ func TestGetResourceModifiersFromConfig(t *testing.T) {
Namespace: "test-namespace",
},
Data: map[string]string{
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupKind: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: replace\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupResource: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: replace\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
},
}
@@ -27,7 +27,7 @@ func TestGetResourceModifiersFromConfig(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "persistentvolumeclaims",
GroupResource: "persistentvolumeclaims",
ResourceNameRegex: ".*",
Namespaces: []string{"bar", "foo"},
},
@@ -51,7 +51,7 @@ func TestGetResourceModifiersFromConfig(t *testing.T) {
Namespace: "test-namespace",
},
Data: map[string]string{
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupKind: deployments.apps\n resourceNameRegex: \"^test-.*$\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: add\n path: \"/spec/template/spec/containers/0\"\n value: \"{\\\"name\\\": \\\"nginx\\\", \\\"image\\\": \\\"nginx:1.14.2\\\", \\\"ports\\\": [{\\\"containerPort\\\": 80}]}\"\n - operation: copy\n from: \"/spec/template/spec/containers/0\"\n path: \"/spec/template/spec/containers/1\"\n\n\n",
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupResource: deployments.apps\n resourceNameRegex: \"^test-.*$\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: add\n path: \"/spec/template/spec/containers/0\"\n value: \"{\\\"name\\\": \\\"nginx\\\", \\\"image\\\": \\\"nginx:1.14.2\\\", \\\"ports\\\": [{\\\"containerPort\\\": 80}]}\"\n - operation: copy\n from: \"/spec/template/spec/containers/0\"\n path: \"/spec/template/spec/containers/1\"\n\n\n",
},
}
@@ -60,7 +60,7 @@ func TestGetResourceModifiersFromConfig(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
GroupResource: "deployments.apps",
ResourceNameRegex: "^test-.*$",
Namespaces: []string{"bar", "foo"},
},
@@ -86,7 +86,33 @@ func TestGetResourceModifiersFromConfig(t *testing.T) {
Namespace: "test-namespace",
},
Data: map[string]string{
"sub.yml": "version1: v1\nresourceModifierRules:\n- conditions:\n groupKind: deployments.apps\n resourceNameRegex: \"^test-.*$\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: add\n path: \"/spec/template/spec/containers/0\"\n value: \"{\\\"name\\\": \\\"nginx\\\", \\\"image\\\": \\\"nginx:1.14.2\\\", \\\"ports\\\": [{\\\"containerPort\\\": 80}]}\"\n - operation: copy\n from: \"/spec/template/spec/containers/0\"\n path: \"/spec/template/spec/containers/1\"\n\n\n",
"sub.yml": "version1: v1\nresourceModifierRules:\n- conditions:\n groupResource: deployments.apps\n resourceNameRegex: \"^test-.*$\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: add\n path: \"/spec/template/spec/containers/0\"\n value: \"{\\\"name\\\": \\\"nginx\\\", \\\"image\\\": \\\"nginx:1.14.2\\\", \\\"ports\\\": [{\\\"containerPort\\\": 80}]}\"\n - operation: copy\n from: \"/spec/template/spec/containers/0\"\n path: \"/spec/template/spec/containers/1\"\n\n\n",
},
}
cm4 := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "test-configmap",
Namespace: "test-namespace",
},
Data: map[string]string{
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupResource: deployments.apps\n labelSelector:\n matchLabels:\n a: b\n",
},
}
rules4 := &ResourceModifiers{
Version: "v1",
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupResource: "deployments.apps",
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"a": "b",
},
},
},
},
},
}
@@ -123,6 +149,14 @@ func TestGetResourceModifiersFromConfig(t *testing.T) {
want: nil,
wantErr: true,
},
{
name: "match labels",
args: args{
cm: cm4,
},
want: rules4,
wantErr: false,
},
{
name: "nil configmap",
args: args{
@@ -183,6 +217,9 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
"metadata": map[string]interface{}{
"name": "test-deployment",
"namespace": "foo",
"labels": map[string]interface{}{
"app": "nginx",
},
},
"spec": map[string]interface{}{
"replicas": int64(1),
@@ -211,6 +248,9 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
"metadata": map[string]interface{}{
"name": "test-deployment",
"namespace": "foo",
"labels": map[string]interface{}{
"app": "nginx",
},
},
"spec": map[string]interface{}{
"replicas": int64(2),
@@ -239,6 +279,9 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
"metadata": map[string]interface{}{
"name": "test-deployment",
"namespace": "foo",
"labels": map[string]interface{}{
"app": "nginx",
},
},
"spec": map[string]interface{}{
"replicas": int64(1),
@@ -287,7 +330,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "persistentvolumeclaims",
GroupResource: "persistentvolumeclaims",
ResourceNameRegex: "[a-z",
Namespaces: []string{"foo"},
},
@@ -320,7 +363,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "persistentvolumeclaims",
GroupResource: "persistentvolumeclaims",
ResourceNameRegex: ".*",
Namespaces: []string{"foo"},
},
@@ -353,7 +396,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
GroupResource: "deployments.apps",
ResourceNameRegex: "^test-.*$",
Namespaces: []string{"foo"},
},
@@ -386,7 +429,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
GroupResource: "deployments.apps",
ResourceNameRegex: "^test-.*$",
Namespaces: []string{"foo"},
},
@@ -419,8 +462,8 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
Namespaces: []string{"foo"},
GroupResource: "deployments.apps",
Namespaces: []string{"foo"},
},
Patches: []JSONPatch{
{
@@ -451,7 +494,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
GroupResource: "deployments.apps",
},
Patches: []JSONPatch{
{
@@ -482,7 +525,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
GroupResource: "deployments.apps",
ResourceNameRegex: ".*",
Namespaces: []string{"bar"},
},
@@ -515,7 +558,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
GroupResource: "deployments.apps",
ResourceNameRegex: "^test-.*$",
Namespaces: []string{"foo"},
},
@@ -543,7 +586,7 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "deployments.apps",
GroupResource: "deployments.apps",
ResourceNameRegex: "^test-.*$",
Namespaces: []string{"foo"},
},
@@ -579,6 +622,80 @@ func TestResourceModifiers_ApplyResourceModifierRules(t *testing.T) {
wantErr: false,
wantObj: deployNginxMysql.DeepCopy(),
},
{
name: "nginx deployment: match label selector",
fields: fields{
Version: "v1",
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupResource: "deployments.apps",
Namespaces: []string{"foo"},
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nginx",
},
},
},
Patches: []JSONPatch{
{
Operation: "test",
Path: "/spec/replicas",
Value: "1",
},
{
Operation: "replace",
Path: "/spec/replicas",
Value: "2",
},
},
},
},
},
args: args{
obj: deployNginxOneReplica.DeepCopy(),
groupResource: "deployments.apps",
},
wantErr: false,
wantObj: deployNginxTwoReplica.DeepCopy(),
},
{
name: "nginx deployment: mismatch label selector",
fields: fields{
Version: "v1",
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupResource: "deployments.apps",
Namespaces: []string{"foo"},
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nginx-mismatch",
},
},
},
Patches: []JSONPatch{
{
Operation: "test",
Path: "/spec/replicas",
Value: "1",
},
{
Operation: "replace",
Path: "/spec/replicas",
Value: "2",
},
},
},
},
},
args: args{
obj: deployNginxOneReplica.DeepCopy(),
groupResource: "deployments.apps",
},
wantErr: false,
wantObj: deployNginxOneReplica.DeepCopy(),
},
}
for _, tt := range tests {

View File

@@ -1,9 +1,8 @@
package resourcemodifiers
import (
"strings"
"fmt"
"strings"
)
func (r *ResourceModifierRule) Validate() error {
@@ -48,8 +47,8 @@ func (p *JSONPatch) Validate() error {
}
func (c *Conditions) Validate() error {
if c.GroupKind == "" {
return fmt.Errorf("groupkind cannot be empty")
if c.GroupResource == "" {
return fmt.Errorf("groupkResource cannot be empty")
}
return nil
}

View File

@@ -21,7 +21,7 @@ func TestResourceModifiers_Validate(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "persistentvolumeclaims",
GroupResource: "persistentvolumeclaims",
ResourceNameRegex: ".*",
Namespaces: []string{"bar", "foo"},
},
@@ -44,7 +44,7 @@ func TestResourceModifiers_Validate(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "persistentvolumeclaims",
GroupResource: "persistentvolumeclaims",
ResourceNameRegex: ".*",
Namespaces: []string{"bar", "foo"},
},
@@ -75,7 +75,7 @@ func TestResourceModifiers_Validate(t *testing.T) {
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "persistentvolumeclaims",
GroupResource: "persistentvolumeclaims",
ResourceNameRegex: ".*",
Namespaces: []string{"bar", "foo"},
},
@@ -92,13 +92,13 @@ func TestResourceModifiers_Validate(t *testing.T) {
wantErr: true,
},
{
name: "Condition has empty GroupKind",
name: "Condition has empty GroupResource",
fields: fields{
Version: "v1",
ResourceModifierRules: []ResourceModifierRule{
{
Conditions: Conditions{
GroupKind: "",
GroupResource: "",
ResourceNameRegex: ".*",
Namespaces: []string{"bar", "foo"},
},

View File

@@ -196,9 +196,8 @@ func (r *itemCollector) getResourceItems(log logrus.FieldLogger, gv schema.Group
log.Info("Getting items for resource")
var (
gvr = gv.WithResource(resource.Name)
gr = gvr.GroupResource()
clusterScoped = !resource.Namespaced
gvr = gv.WithResource(resource.Name)
gr = gvr.GroupResource()
)
orders := getOrderedResourcesForType(r.backupRequest.Backup.Spec.OrderedResources, resource.Name)
@@ -272,8 +271,6 @@ func (r *itemCollector) getResourceItems(log logrus.FieldLogger, gv schema.Group
}
}
namespacesToList := getNamespacesToList(r.backupRequest.NamespaceIncludesExcludes)
// Handle namespace resource here.
// Namespace are only filtered by namespace include/exclude filters.
// Label selectors are not checked.
@@ -289,10 +286,12 @@ func (r *itemCollector) getResourceItems(log logrus.FieldLogger, gv schema.Group
return nil, errors.WithStack(err)
}
items := r.backupNamespaces(unstructuredList, namespacesToList, gr, preferredGVR, log)
items := r.backupNamespaces(unstructuredList, r.backupRequest.NamespaceIncludesExcludes, gr, preferredGVR, log)
return items, nil
}
clusterScoped := !resource.Namespaced
namespacesToList := getNamespacesToList(r.backupRequest.NamespaceIncludesExcludes)
// If we get here, we're backing up something other than namespaces
if clusterScoped {
@@ -533,31 +532,13 @@ func (r *itemCollector) listItemsForLabel(unstructuredItems []unstructured.Unstr
// backupNamespaces process namespace resource according to namespace filters.
func (r *itemCollector) backupNamespaces(unstructuredList *unstructured.UnstructuredList,
namespacesToList []string, gr schema.GroupResource, preferredGVR schema.GroupVersionResource,
ie *collections.IncludesExcludes, gr schema.GroupResource, preferredGVR schema.GroupVersionResource,
log logrus.FieldLogger) []*kubernetesResource {
var items []*kubernetesResource
for index, unstructured := range unstructuredList.Items {
found := false
if len(namespacesToList) == 0 {
// No namespace found. By far, this condition cannot be triggered. Either way,
// namespacesToList is not empty.
log.Debug("Skip namespace resource, because no item found by namespace filters.")
break
} else if len(namespacesToList) == 1 && namespacesToList[0] == "" {
// All namespaces are included.
log.Debugf("Backup namespace %s due to full cluster backup.", unstructured.GetName())
found = true
} else {
for _, ns := range namespacesToList {
if unstructured.GetName() == ns {
log.Debugf("Backup namespace %s due to namespace filters setting.", unstructured.GetName())
found = true
break
}
}
}
if ie.ShouldInclude(unstructured.GetName()) {
log.Debugf("Backup namespace %s due to namespace filters setting.", unstructured.GetName())
if found {
path, err := r.writeToFile(&unstructuredList.Items[index])
if err != nil {
log.WithError(err).Error("Error writing item to file")

View File

@@ -153,6 +153,8 @@ func (o *CreateOptions) Run(c *cobra.Command, f client.Factory) error {
OrderedResources: orders,
CSISnapshotTimeout: metav1.Duration{Duration: o.BackupOptions.CSISnapshotTimeout},
ItemOperationTimeout: metav1.Duration{Duration: o.BackupOptions.ItemOperationTimeout},
DataMover: o.BackupOptions.DataMover,
SnapshotMoveData: o.BackupOptions.SnapshotMoveData.Value,
},
Schedule: o.Schedule,
UseOwnerReferencesInBackup: &o.UseOwnerReferencesInBackup,

View File

@@ -19,6 +19,8 @@ package uninstall
import (
"context"
"fmt"
"reflect"
"sync"
"time"
"github.com/pkg/errors"
@@ -40,6 +42,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd"
"github.com/vmware-tanzu/velero/pkg/cmd/cli"
@@ -50,6 +53,8 @@ import (
var gracefulDeletionMaximumDuration = 1 * time.Minute
var resToDelete = []kbclient.ObjectList{}
// uninstallOptions collects all the options for uninstalling Velero from a Kubernetes cluster.
type uninstallOptions struct {
wait bool // deprecated
@@ -167,11 +172,7 @@ func Run(ctx context.Context, kbClient kbclient.Client, namespace string) error
}
func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Deal with resources with attached finalizers to ensure proper handling of those finalizers.
if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil {
return errors.Wrap(err, "Fail to remove finalizer from restores")
}
// First check if it's already been deleted
ns := &corev1.Namespace{}
key := kbclient.ObjectKey{Name: namespace}
if err := kbClient.Get(ctx, key, ns); err != nil {
@@ -182,6 +183,11 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
return err
}
// Deal with resources with attached finalizers to ensure proper handling of those finalizers.
if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil {
return errors.Wrap(err, "Fail to remove finalizer from restores")
}
if err := kbClient.Delete(ctx, ns); err != nil {
if apierrors.IsNotFound(err) {
fmt.Printf("Velero namespace %q does not exist, skipping.\n", namespace)
@@ -223,80 +229,151 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
// 2. The controller may encounter errors while handling the finalizer, in such case, the controller will keep trying until it succeeds.
// So it is important to set a timeout, once the process exceed the timeout, we will forcedly delete the resources by removing the finalizer from them,
// otherwise the deletion process may get stuck indefinitely.
// 3. There is only restore finalizer supported as of v1.12. If any new finalizers are added in the future, the corresponding deletion logic can be
// 3. There is only resources finalizer supported as of v1.12. If any new finalizers are added in the future, the corresponding deletion logic can be
// incorporated into this function.
func deleteResourcesWithFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error {
fmt.Println("Waiting for resource with attached finalizer to be deleted")
return deleteRestore(ctx, kbClient, namespace)
return deleteResources(ctx, kbClient, namespace)
}
func deleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Check if restore crd exists, if it does not exist, return immediately.
func checkResources(ctx context.Context, kbClient kbclient.Client) error {
checkCRDs := []string{"restores.velero.io", "datauploads.velero.io", "datadownloads.velero.io"}
var err error
v1crd := &apiextv1.CustomResourceDefinition{}
key := kbclient.ObjectKey{Name: "restores.velero.io"}
if err = kbClient.Get(ctx, key, v1crd); err != nil {
if apierrors.IsNotFound(err) {
return nil
for _, crd := range checkCRDs {
key := kbclient.ObjectKey{Name: crd}
if err = kbClient.Get(ctx, key, v1crd); err != nil {
if !apierrors.IsNotFound(err) {
return errors.Wrapf(err, "Error getting %s crd", crd)
}
} else {
return errors.Wrap(err, "Error getting restore crd")
// no error with found CRD that we should delete
switch crd {
case "restores.velero.io":
resToDelete = append(resToDelete, &velerov1api.RestoreList{})
case "datauploads.velero.io":
resToDelete = append(resToDelete, &velerov2alpha1api.DataUploadList{})
case "datadownloads.velero.io":
resToDelete = append(resToDelete, &velerov2alpha1api.DataDownloadList{})
default:
fmt.Printf("Unsupported type %s to be cleared\n", crd)
}
}
}
return nil
}
// First attempt to gracefully delete all the restore within a specified time frame, If the process exceeds the timeout limit,
// it is likely that there may be errors during the finalization of restores. In such cases, we should proceed with forcefully deleting the restores.
err = gracefullyDeleteRestore(ctx, kbClient, namespace)
if err != nil && err != wait.ErrWaitTimeout {
return errors.Wrap(err, "Error deleting restores")
func deleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Check if resources crd exists, if it does not exist, return immediately.
err := checkResources(ctx, kbClient)
if err != nil {
return err
}
// First attempt to gracefully delete all the resources within a specified time frame, If the process exceeds the timeout limit,
// it is likely that there may be errors during the finalization of restores. In such cases, we should proceed with forcefully deleting the restores.
err = gracefullyDeleteResources(ctx, kbClient, namespace)
if err != nil && err != wait.ErrWaitTimeout {
return errors.Wrap(err, "Error deleting resources")
}
if err == wait.ErrWaitTimeout {
err = forcedlyDeleteRestore(ctx, kbClient, namespace)
err = forcedlyDeleteResources(ctx, kbClient, namespace)
if err != nil {
return errors.Wrap(err, "Error deleting restores")
return errors.Wrap(err, "Error deleting resources forcedly")
}
}
return nil
}
func gracefullyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
var err error
restoreList := &velerov1api.RestoreList{}
if err = kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting restores during graceful deletion")
func gracefullyDeleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
errorChan := make(chan error)
var wg sync.WaitGroup
wg.Add(len(resToDelete))
for i := range resToDelete {
go func(index int) {
defer wg.Done()
errorChan <- gracefullyDeleteResource(ctx, kbClient, namespace, resToDelete[index])
}(i)
}
for i := range restoreList.Items {
if err = kbClient.Delete(ctx, &restoreList.Items[i]); err != nil {
go func() {
wg.Wait()
close(errorChan)
}()
for err := range errorChan {
if err != nil {
return err
}
}
return waitDeletingResources(ctx, kbClient, namespace)
}
func gracefullyDeleteResource(ctx context.Context, kbClient kbclient.Client, namespace string, list kbclient.ObjectList) error {
if err := kbClient.List(ctx, list, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting resources during graceful deletion")
}
var objectsToDelete []kbclient.Object
items := reflect.ValueOf(list).Elem().FieldByName("Items")
for i := 0; i < items.Len(); i++ {
item := items.Index(i).Addr().Interface()
// Type assertion to cast item to the appropriate type
switch typedItem := item.(type) {
case *velerov1api.Restore:
objectsToDelete = append(objectsToDelete, typedItem)
case *velerov2alpha1api.DataUpload:
objectsToDelete = append(objectsToDelete, typedItem)
case *velerov2alpha1api.DataDownload:
objectsToDelete = append(objectsToDelete, typedItem)
default:
return errors.New("Unsupported resource type")
}
}
// Delete collected resources in a batch
for _, resource := range objectsToDelete {
if err := kbClient.Delete(ctx, resource); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return errors.Wrap(err, "Error deleting restores during graceful deletion")
return errors.Wrap(err, "Error deleting resources during graceful deletion")
}
}
return nil
}
func waitDeletingResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Wait for the deletion of all the restores within a specified time frame
err = wait.PollImmediate(time.Second, gracefulDeletionMaximumDuration, func() (bool, error) {
restoreList := &velerov1api.RestoreList{}
if errList := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
err := wait.PollImmediate(time.Second, gracefulDeletionMaximumDuration, func() (bool, error) {
itemsCount := 0
for i := range resToDelete {
if errList := kbClient.List(ctx, resToDelete[i], &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
}
itemsCount += reflect.ValueOf(resToDelete[i]).Elem().FieldByName("Items").Len()
if itemsCount > 0 {
fmt.Print(".")
return false, nil
}
}
if len(restoreList.Items) > 0 {
fmt.Print(".")
return false, nil
} else {
return true, nil
}
return true, nil
})
return err
}
func forcedlyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
func forcedlyDeleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Delete velero deployment first in case:
// 1. finalizers will be added back by restore controller after they are removed at next step;
// 2. new restores attached with finalizer will be created by restore controller after we remove all the restores' finalizer at next step;
// 1. finalizers will be added back by resources related controller after they are removed at next step;
// 2. new resources attached with finalizer will be created by controller after we remove all the resources' finalizer at next step;
deploy := &appsv1api.Deployment{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
@@ -330,23 +407,56 @@ func forcedlyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namesp
if err != nil {
return errors.Wrap(err, "Error deleting velero deployment during force deletion")
}
return removeResourcesFinalizer(ctx, kbClient, namespace)
}
// Remove all the restores' finalizer so they can be deleted during the deletion of velero namespace.
restoreList := &velerov1api.RestoreList{}
if err := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting restores during force deletion")
func removeResourcesFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error {
for i := range resToDelete {
if err := removeResourceFinalizer(ctx, kbClient, namespace, resToDelete[i]); err != nil {
return err
}
}
return nil
}
func removeResourceFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string, resourceList kbclient.ObjectList) error {
listOptions := &kbclient.ListOptions{Namespace: namespace}
if err := kbClient.List(ctx, resourceList, listOptions); err != nil {
return errors.Wrap(err, fmt.Sprintf("Error getting resources of type %T during force deletion", resourceList))
}
for i := range restoreList.Items {
if controllerutil.ContainsFinalizer(&restoreList.Items[i], controller.ExternalResourcesFinalizer) {
update := &restoreList.Items[i]
original := update.DeepCopy()
controllerutil.RemoveFinalizer(update, controller.ExternalResourcesFinalizer)
if err := kubeutil.PatchResource(original, update, kbClient); err != nil {
return errors.Wrap(err, "Error removing restore finalizer during force deletion")
}
items := reflect.ValueOf(resourceList).Elem().FieldByName("Items")
var err error
for i := 0; i < items.Len(); i++ {
item := items.Index(i).Addr().Interface()
// Type assertion to cast item to the appropriate type
switch typedItem := item.(type) {
case *velerov1api.Restore:
err = removeFinalizerForObject(typedItem, controller.ExternalResourcesFinalizer, kbClient)
case *velerov2alpha1api.DataUpload:
err = removeFinalizerForObject(typedItem, controller.DataUploadDownloadFinalizer, kbClient)
case *velerov2alpha1api.DataDownload:
err = removeFinalizerForObject(typedItem, controller.DataUploadDownloadFinalizer, kbClient)
default:
err = errors.Errorf("Unsupported resource type %T", typedItem)
}
if err != nil {
return err
}
}
return nil
}
func removeFinalizerForObject(obj kbclient.Object, finalizer string, kbClient kbclient.Client) error {
if controllerutil.ContainsFinalizer(obj, finalizer) {
update := obj.DeepCopyObject().(kbclient.Object)
original := obj.DeepCopyObject().(kbclient.Object)
controllerutil.RemoveFinalizer(update, finalizer)
if err := kubeutil.PatchResource(original, update, kbClient); err != nil {
return errors.Wrap(err, fmt.Sprintf("Error removing finalizer %q during force deletion", finalizer))
}
}
return nil
}

View File

@@ -123,9 +123,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
// Add finalizer
// Logic for clear resources when datadownload been deleted
if dd.DeletionTimestamp.IsZero() { // add finalizer for all cr at beginning
if !isDataDownloadInFinalState(dd) && !controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) {
if !isDataDownloadInFinalState(dd) && !controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
succeeded, err := r.exclusiveUpdateDataDownload(ctx, dd, func(dd *velerov2alpha1api.DataDownload) {
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
})
if err != nil {
log.Errorf("failed to add finalizer with error %s for %s/%s", err.Error(), dd.Namespace, dd.Name)
@@ -135,7 +135,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{Requeue: true}, nil
}
}
} else if controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
} else if controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
if err := UpdateDataDownloadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataDownload *velerov2alpha1api.DataDownload) {
@@ -309,9 +309,11 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else {
// put the finilizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state
if isDataDownloadInFinalState(dd) && !dd.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) {
// remove finalizer no matter whether the cr is being deleted or not for it is no longer needed when internal resources are all cleaned up
// also in final status cr won't block the direct delete of the velero namespace
if isDataDownloadInFinalState(dd) && controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
original := dd.DeepCopy()
controllerutil.RemoveFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.RemoveFinalizer(dd, DataUploadDownloadFinalizer)
if err := r.client.Patch(ctx, dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("error to remove finalizer")
}

View File

@@ -299,7 +299,7 @@ func TestDataDownloadReconcile(t *testing.T) {
name: "dataDownload with enabled cancel",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result()
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
@@ -312,12 +312,12 @@ func TestDataDownloadReconcile(t *testing.T) {
name: "dataDownload with remove finalizer and should not be retrieved",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Cancel(true).Result()
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
checkFunc: func(dd velerov2alpha1api.DataDownload) bool {
return !controllerutil.ContainsFinalizer(&dd, dataUploadDownloadFinalizer)
return !controllerutil.ContainsFinalizer(&dd, DataUploadDownloadFinalizer)
},
},
}
@@ -428,7 +428,7 @@ func TestDataDownloadReconcile(t *testing.T) {
assert.Contains(t, dd.Status.Message, test.expectedStatusMsg)
}
if test.dd.Namespace == velerov1api.DefaultNamespace {
if controllerutil.ContainsFinalizer(test.dd, dataUploadDownloadFinalizer) {
if controllerutil.ContainsFinalizer(test.dd, DataUploadDownloadFinalizer) {
assert.True(t, true, apierrors.IsNotFound(err))
} else {
require.Nil(t, err)

View File

@@ -58,7 +58,7 @@ import (
const (
dataUploadDownloadRequestor = "snapshot-data-upload-download"
acceptNodeLabelKey = "velero.io/accepted-by"
dataUploadDownloadFinalizer = "velero.io/data-upload-download-finalizer"
DataUploadDownloadFinalizer = "velero.io/data-upload-download-finalizer"
preparingMonitorFrequency = time.Minute
)
@@ -132,9 +132,9 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// Logic for clear resources when dataupload been deleted
if du.DeletionTimestamp.IsZero() { // add finalizer for all cr at beginning
if !isDataUploadInFinalState(du) && !controllerutil.ContainsFinalizer(du, dataUploadDownloadFinalizer) {
if !isDataUploadInFinalState(du) && !controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) {
succeeded, err := r.exclusiveUpdateDataUpload(ctx, du, func(du *velerov2alpha1api.DataUpload) {
controllerutil.AddFinalizer(du, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(du, DataUploadDownloadFinalizer)
})
if err != nil {
@@ -145,7 +145,7 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{Requeue: true}, nil
}
}
} else if controllerutil.ContainsFinalizer(du, dataUploadDownloadFinalizer) && !du.Spec.Cancel && !isDataUploadInFinalState(du) {
} else if controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) && !du.Spec.Cancel && !isDataUploadInFinalState(du) {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
if err := UpdateDataUploadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataUpload *velerov2alpha1api.DataUpload) {
@@ -308,10 +308,12 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
} else {
// put the finilizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state
if isDataUploadInFinalState(du) && !du.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(du, dataUploadDownloadFinalizer) {
// instead of intermediate state.
// remove finalizer no matter whether the cr is being deleted or not for it is no longer needed when internal resources are all cleaned up
// also in final status cr won't block the direct delete of the velero namespace
if isDataUploadInFinalState(du) && controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) {
original := du.DeepCopy()
controllerutil.RemoveFinalizer(du, dataUploadDownloadFinalizer)
controllerutil.RemoveFinalizer(du, DataUploadDownloadFinalizer)
if err := r.client.Patch(ctx, du, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("error to remove finalizer")
}

View File

@@ -399,7 +399,7 @@ func TestReconcile(t *testing.T) {
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: func() *velerov2alpha1api.DataUpload {
du := dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Result()
controllerutil.AddFinalizer(du, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(du, DataUploadDownloadFinalizer)
du.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return du
}(),
@@ -415,13 +415,13 @@ func TestReconcile(t *testing.T) {
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: func() *velerov2alpha1api.DataUpload {
du := dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).SnapshotType(fakeSnapshotType).Cancel(true).Result()
controllerutil.AddFinalizer(du, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(du, DataUploadDownloadFinalizer)
du.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return du
}(),
expectedProcessed: false,
checkFunc: func(du velerov2alpha1api.DataUpload) bool {
return !controllerutil.ContainsFinalizer(&du, dataUploadDownloadFinalizer)
return !controllerutil.ContainsFinalizer(&du, DataUploadDownloadFinalizer)
},
expectedRequeue: ctrl.Result{},
},

View File

@@ -671,14 +671,13 @@ func (r *restoreReconciler) deleteExternalResources(restore *api.Restore) error
backupInfo, err := r.fetchBackupInfo(restore.Spec.BackupName)
if err != nil {
if apierrors.IsNotFound(err) {
r.logger.Errorf("got not found error: %v, skip deleting the restore files in object storage", err)
return nil
}
return errors.Wrap(err, fmt.Sprintf("can't get backup info, backup: %s", restore.Spec.BackupName))
}
// if storage locations is read-only, skip deletion
if backupInfo.location.Spec.AccessMode == api.BackupStorageLocationAccessModeReadOnly {
return nil
}
// delete restore files in object storage
pluginManager := r.newPluginManager(r.logger)
defer pluginManager.CleanupClients()

View File

@@ -760,7 +760,7 @@ func TestValidateAndCompleteWithResourceModifierSpecified(t *testing.T) {
Namespace: velerov1api.DefaultNamespace,
},
Data: map[string]string{
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupKind: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: replace\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupResource: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: replace\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
},
}
require.NoError(t, r.kbClient.Create(context.Background(), cm1))
@@ -788,7 +788,7 @@ func TestValidateAndCompleteWithResourceModifierSpecified(t *testing.T) {
Namespace: velerov1api.DefaultNamespace,
},
Data: map[string]string{
"sub.yml": "version1: v1\nresourceModifierRules:\n- conditions:\n groupKind: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: replace\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
"sub.yml": "version1: v1\nresourceModifierRules:\n- conditions:\n groupResource: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: replace\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
},
}
require.NoError(t, r.kbClient.Create(context.Background(), invalidVersionCm))
@@ -816,7 +816,7 @@ func TestValidateAndCompleteWithResourceModifierSpecified(t *testing.T) {
Namespace: velerov1api.DefaultNamespace,
},
Data: map[string]string{
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupKind: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: invalid\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
"sub.yml": "version: v1\nresourceModifierRules:\n- conditions:\n groupResource: persistentvolumeclaims\n resourceNameRegex: \".*\"\n namespaces:\n - bar\n - foo\n patches:\n - operation: invalid\n path: \"/spec/storageClassName\"\n value: \"premium\"\n - operation: remove\n path: \"/metadata/labels/test\"\n\n\n",
},
}
require.NoError(t, r.kbClient.Create(context.Background(), invalidOperatorCm))

View File

@@ -139,18 +139,6 @@ func (r *restoreOperationsReconciler) Reconcile(ctx context.Context, req ctrl.Re
return ctrl.Result{}, errors.Wrap(err, "error getting backup info")
}
if info.location.Spec.AccessMode == velerov1api.BackupStorageLocationAccessModeReadOnly {
log.Infof("Cannot check progress on Restore operations because backup storage location %s is currently in read-only mode; marking restore PartiallyFailed", info.location.Name)
restore.Status.Phase = velerov1api.RestorePhasePartiallyFailed
restore.Status.CompletionTimestamp = &metav1.Time{Time: r.clock.Now()}
r.metrics.RegisterRestorePartialFailure(restore.Spec.ScheduleName)
err := r.updateRestoreAndOperationsJSON(ctx, original, restore, nil, &itemoperationmap.OperationsForRestore{ErrsSinceUpdate: []string{"BSL is read-only"}}, false, false)
if err != nil {
log.WithError(err).Error("error updating Restore")
}
return ctrl.Result{}, nil
}
pluginManager := r.newPluginManager(r.logger)
defer pluginManager.CleanupClients()
backupStore, err := r.backupStoreGetter.Get(info.location, pluginManager, r.logger)

View File

@@ -18,6 +18,7 @@ package discovery
import (
"sort"
"strings"
"sync"
"github.com/pkg/errors"
@@ -170,7 +171,7 @@ func (h *helper) Refresh() error {
}
h.resources = discovery.FilteredBy(
discovery.ResourcePredicateFunc(filterByVerbs),
And(filterByVerbs, skipSubresource),
serverResources,
)
@@ -240,10 +241,34 @@ func refreshServerGroupsAndResources(discoveryClient serverResourcesInterface, l
return serverGroups, serverResources, err
}
// And returns a composite predicate that implements a logical AND of the predicates passed to it.
func And(predicates ...discovery.ResourcePredicateFunc) discovery.ResourcePredicate {
return and{predicates}
}
type and struct {
predicates []discovery.ResourcePredicateFunc
}
func (a and) Match(groupVersion string, r *metav1.APIResource) bool {
for _, p := range a.predicates {
if !p(groupVersion, r) {
return false
}
}
return true
}
func filterByVerbs(groupVersion string, r *metav1.APIResource) bool {
return discovery.SupportsAllVerbs{Verbs: []string{"list", "create", "get", "delete"}}.Match(groupVersion, r)
}
func skipSubresource(_ string, r *metav1.APIResource) bool {
// if we have a slash, then this is a subresource and we shouldn't include it.
return !strings.Contains(r.Name, "/")
}
// sortResources sources resources by moving extensions to the end of the slice. The order of all
// the other resources is preserved.
func sortResources(resources []*metav1.APIResourceList) {

View File

@@ -246,8 +246,9 @@ func (e *csiSnapshotExposer) createBackupVS(ctx context.Context, ownerObject cor
vs := &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: backupVSName,
Namespace: ownerObject.Namespace,
Name: backupVSName,
Namespace: ownerObject.Namespace,
Annotations: snapshotVS.Annotations,
// Don't add ownerReference to SnapshotBackup.
// The backupPVC should be deleted before backupVS, otherwise, the deletion of backupVS will fail since
// backupPVC has its dataSource referring to it
@@ -268,7 +269,8 @@ func (e *csiSnapshotExposer) createBackupVSC(ctx context.Context, ownerObject co
vsc := &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: backupVSCName,
Name: backupVSCName,
Annotations: snapshotVSC.Annotations,
},
Spec: snapshotv1api.VolumeSnapshotContentSpec{
VolumeSnapshotRef: corev1.ObjectReference{

View File

@@ -58,10 +58,22 @@ func TestExpose(t *testing.T) {
UID: "fake-uid",
},
}
snapshotClass := "fake-snapshot-class"
vsObject := &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
Annotations: map[string]string{
"fake-key-1": "fake-value-1",
"fake-key-2": "fake-value-2",
},
},
Spec: snapshotv1api.VolumeSnapshotSpec{
Source: snapshotv1api.VolumeSnapshotSource{
VolumeSnapshotContentName: &vscName,
},
VolumeSnapshotClassName: &snapshotClass,
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
@@ -71,15 +83,23 @@ func TestExpose(t *testing.T) {
}
var restoreSize int64
snapshotHandle := "fake-handle"
vscObj := &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
Name: vscName,
Annotations: map[string]string{
"fake-key-3": "fake-value-3",
"fake-key-4": "fake-value-4",
},
},
Spec: snapshotv1api.VolumeSnapshotContentSpec{
DeletionPolicy: snapshotv1api.VolumeSnapshotContentDelete,
DeletionPolicy: snapshotv1api.VolumeSnapshotContentDelete,
Driver: "fake-driver",
VolumeSnapshotClassName: &snapshotClass,
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
RestoreSize: &restoreSize,
RestoreSize: &restoreSize,
SnapshotHandle: &snapshotHandle,
},
}
@@ -284,6 +304,23 @@ func TestExpose(t *testing.T) {
},
err: "error to create backup pod: fake-create-error",
},
{
name: "success",
ownerBackup: backup,
exposeParam: CSISnapshotExposeParam{
SnapshotName: "fake-vs",
SourceNamespace: "fake-ns",
AccessMode: AccessModeFileSystem,
Timeout: time.Millisecond,
},
snapshotClientObj: []runtime.Object{
vsObject,
vscObj,
},
kubeClientObj: []runtime.Object{
daemonSet,
},
},
}
for _, test := range tests {
@@ -317,7 +354,33 @@ func TestExpose(t *testing.T) {
}
err := exposer.Expose(context.Background(), ownerObject, &test.exposeParam)
assert.EqualError(t, err, test.err)
if err == nil {
assert.NoError(t, err)
_, err = exposer.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(context.Background(), ownerObject.Name, metav1.GetOptions{})
assert.NoError(t, err)
_, err = exposer.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(context.Background(), ownerObject.Name, metav1.GetOptions{})
assert.NoError(t, err)
expectedVS, err := exposer.csiSnapshotClient.VolumeSnapshots(ownerObject.Namespace).Get(context.Background(), ownerObject.Name, metav1.GetOptions{})
assert.NoError(t, err)
expectedVSC, err := exposer.csiSnapshotClient.VolumeSnapshotContents().Get(context.Background(), ownerObject.Name, metav1.GetOptions{})
assert.NoError(t, err)
assert.Equal(t, expectedVS.Annotations, vsObject.Annotations)
assert.Equal(t, *expectedVS.Spec.VolumeSnapshotClassName, *vsObject.Spec.VolumeSnapshotClassName)
assert.Equal(t, *expectedVS.Spec.Source.VolumeSnapshotContentName, expectedVSC.Name)
assert.Equal(t, expectedVSC.Annotations, vscObj.Annotations)
assert.Equal(t, expectedVSC.Spec.DeletionPolicy, vscObj.Spec.DeletionPolicy)
assert.Equal(t, expectedVSC.Spec.Driver, vscObj.Spec.Driver)
assert.Equal(t, *expectedVSC.Spec.VolumeSnapshotClassName, *vscObj.Spec.VolumeSnapshotClassName)
} else {
assert.EqualError(t, err, test.err)
}
})
}
}

View File

@@ -221,7 +221,7 @@ func (e *genericRestoreExposer) RebindVolume(ctx context.Context, ownerObject co
}
restorePVName := restorePV.Name
restorePV, err = kube.ResetPVBinding(ctx, e.kubeClient.CoreV1(), restorePV, matchLabel)
restorePV, err = kube.ResetPVBinding(ctx, e.kubeClient.CoreV1(), restorePV, matchLabel, targetPVC)
if err != nil {
return errors.Wrapf(err, "error to reset binding info for restore PV %s", restorePVName)
}

View File

@@ -32,7 +32,11 @@ import (
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
)
const defaultServiceAccountName = "velero"
const (
defaultServiceAccountName = "velero"
podSecurityLevel = "privileged"
podSecurityVersion = "latest"
)
var (
DefaultVeleroPodCPURequest = "500m"
@@ -148,8 +152,12 @@ func Namespace(namespace string) *corev1.Namespace {
},
}
ns.Labels["pod-security.kubernetes.io/enforce"] = "privileged"
ns.Labels["pod-security.kubernetes.io/enforce-version"] = "latest"
ns.Labels["pod-security.kubernetes.io/enforce"] = podSecurityLevel
ns.Labels["pod-security.kubernetes.io/enforce-version"] = podSecurityVersion
ns.Labels["pod-security.kubernetes.io/audit"] = podSecurityLevel
ns.Labels["pod-security.kubernetes.io/audit-version"] = podSecurityVersion
ns.Labels["pod-security.kubernetes.io/warn"] = podSecurityLevel
ns.Labels["pod-security.kubernetes.io/warn-version"] = podSecurityVersion
return ns
}

View File

@@ -47,6 +47,10 @@ func TestResources(t *testing.T) {
// PSA(Pod Security Admission) and PSS(Pod Security Standards).
assert.Equal(t, ns.Labels["pod-security.kubernetes.io/enforce"], "privileged")
assert.Equal(t, ns.Labels["pod-security.kubernetes.io/enforce-version"], "latest")
assert.Equal(t, ns.Labels["pod-security.kubernetes.io/audit"], "privileged")
assert.Equal(t, ns.Labels["pod-security.kubernetes.io/audit-version"], "latest")
assert.Equal(t, ns.Labels["pod-security.kubernetes.io/warn"], "privileged")
assert.Equal(t, ns.Labels["pod-security.kubernetes.io/warn-version"], "latest")
crb := ClusterRoleBinding(DefaultVeleroNamespace)
// The CRB is a cluster-scoped resource

View File

@@ -71,6 +71,10 @@ import (
"github.com/vmware-tanzu/velero/pkg/volume"
)
var resourceMustHave = []string{
"datauploads.velero.io",
}
type VolumeSnapshotterGetter interface {
GetVolumeSnapshotter(name string) (vsv1.VolumeSnapshotter, error)
}
@@ -276,6 +280,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
resourceIncludesExcludes: resourceIncludesExcludes,
resourceStatusIncludesExcludes: restoreStatusIncludesExcludes,
namespaceIncludesExcludes: namespaceIncludesExcludes,
resourceMustHave: sets.NewString(resourceMustHave...),
chosenGrpVersToRestore: make(map[string]ChosenGroupVersion),
selector: selector,
OrSelectors: OrSelectors,
@@ -320,6 +325,7 @@ type restoreContext struct {
resourceIncludesExcludes *collections.IncludesExcludes
resourceStatusIncludesExcludes *collections.IncludesExcludes
namespaceIncludesExcludes *collections.IncludesExcludes
resourceMustHave sets.String
chosenGrpVersToRestore map[string]ChosenGroupVersion
selector labels.Selector
OrSelectors []labels.Selector
@@ -989,7 +995,7 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
// via obj.GetNamespace()) instead of the namespace parameter, because we want
// to check the *original* namespace, not the remapped one if it's been remapped.
if namespace != "" {
if !ctx.namespaceIncludesExcludes.ShouldInclude(obj.GetNamespace()) {
if !ctx.namespaceIncludesExcludes.ShouldInclude(obj.GetNamespace()) && !ctx.resourceMustHave.Has(groupResource.String()) {
ctx.log.WithFields(logrus.Fields{
"namespace": obj.GetNamespace(),
"name": obj.GetName(),
@@ -1346,6 +1352,14 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
}
}
if ctx.resourceModifiers != nil {
if errList := ctx.resourceModifiers.ApplyResourceModifierRules(obj, groupResource.String(), ctx.log); errList != nil {
for _, err := range errList {
errs.Add(namespace, err)
}
}
}
// Necessary because we may have remapped the namespace if the namespace is
// blank, don't create the key.
originalNamespace := obj.GetNamespace()
@@ -1358,14 +1372,6 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
// and which backup they came from.
addRestoreLabels(obj, ctx.restore.Name, ctx.restore.Spec.BackupName)
if ctx.resourceModifiers != nil {
if errList := ctx.resourceModifiers.ApplyResourceModifierRules(obj, groupResource.String(), ctx.log); errList != nil {
for _, err := range errList {
errs.Add(namespace, err)
}
}
}
// The object apiVersion might get modified by a RestorePlugin so we need to
// get a new client to reflect updated resource path.
newGR := schema.GroupResource{Group: obj.GroupVersionKind().Group, Resource: groupResource.Resource}
@@ -2016,7 +2022,7 @@ func (ctx *restoreContext) getOrderedResourceCollection(
// Iterate through each namespace that contains instances of the
// resource and append to the list of to-be restored resources.
for namespace, items := range resourceList.ItemsByNamespace {
if namespace != "" && !ctx.namespaceIncludesExcludes.ShouldInclude(namespace) {
if namespace != "" && !ctx.namespaceIncludesExcludes.ShouldInclude(namespace) && !ctx.resourceMustHave.Has(groupResource.String()) {
ctx.log.Infof("Skipping namespace %s", namespace)
continue
}

View File

@@ -210,7 +210,25 @@ func (sr *shimRepository) DeleteManifest(ctx context.Context, id manifest.ID) er
}
func (sr *shimRepository) ReplaceManifests(ctx context.Context, labels map[string]string, payload interface{}) (manifest.ID, error) {
return manifest.ID(""), errors.New("ReplaceManifests is not supported")
const minReplaceManifestTimeDelta = 100 * time.Millisecond
md, err := sr.FindManifests(ctx, labels)
if err != nil {
return "", errors.Wrap(err, "unable to load manifests")
}
for _, em := range md {
age := sr.Time().Sub(em.ModTime)
if age < minReplaceManifestTimeDelta {
time.Sleep(minReplaceManifestTimeDelta)
}
if err := sr.DeleteManifest(ctx, em.ID); err != nil {
return "", errors.Wrapf(err, "unable to delete previous manifest %v", em.ID)
}
}
return sr.PutManifest(ctx, labels, payload)
}
// Flush all the unifited repository data

View File

@@ -49,7 +49,6 @@ func TestShimRepo(t *testing.T) {
shim.PrefetchObjects(ctx, []object.ID{}, "hint")
shim.UpdateDescription("desc")
shim.NewWriter(ctx, repo.WriteSessionOptions{})
shim.ReplaceManifests(ctx, map[string]string{}, nil)
shim.OnSuccessfulFlush(func(ctx context.Context, w repo.RepositoryWriter) error { return nil })
backupRepo.On("Close", mock.Anything).Return(nil)
@@ -202,3 +201,92 @@ func TestShimObjWriter(t *testing.T) {
objWriter.On("Close").Return(nil)
writer.Close()
}
func TestReplaceManifests(t *testing.T) {
meta1 := udmrepo.ManifestEntryMetadata{
ID: "mani-1",
}
meta2 := udmrepo.ManifestEntryMetadata{
ID: "mani-2",
}
tests := []struct {
name string
backupRepo *mocks.BackupRepo
isGetManifestError bool
expectedError string
expectedID manifest.ID
}{
{
name: "Failed to find manifest",
isGetManifestError: true,
backupRepo: func() *mocks.BackupRepo {
backupRepo := &mocks.BackupRepo{}
backupRepo.On("FindManifests", mock.Anything, mock.Anything).Return([]*udmrepo.ManifestEntryMetadata{},
errors.New("fake-find-error"))
return backupRepo
}(),
expectedError: "unable to load manifests: failed to get manifests with labels map[]: fake-find-error",
},
{
name: "Failed to delete manifest",
isGetManifestError: true,
backupRepo: func() *mocks.BackupRepo {
backupRepo := &mocks.BackupRepo{}
backupRepo.On("FindManifests", mock.Anything, mock.Anything).Return([]*udmrepo.ManifestEntryMetadata{
&meta1,
&meta2,
}, nil)
backupRepo.On("Time").Return(time.Now())
backupRepo.On("DeleteManifest", mock.Anything, mock.Anything).Return(errors.New("fake-delete-error"))
return backupRepo
}(),
expectedError: "unable to delete previous manifest mani-1: fake-delete-error",
},
{
name: "Failed to put manifest",
backupRepo: func() *mocks.BackupRepo {
backupRepo := &mocks.BackupRepo{}
backupRepo.On("FindManifests", mock.Anything, mock.Anything).Return([]*udmrepo.ManifestEntryMetadata{
&meta1,
&meta2,
}, nil)
backupRepo.On("Time").Return(time.Now())
backupRepo.On("DeleteManifest", mock.Anything, mock.Anything).Return(nil)
backupRepo.On("PutManifest", mock.Anything, mock.Anything).Return(udmrepo.ID(""), errors.New("fake-put-error"))
return backupRepo
}(),
expectedError: "fake-put-error",
},
{
name: "Success",
backupRepo: func() *mocks.BackupRepo {
backupRepo := &mocks.BackupRepo{}
backupRepo.On("FindManifests", mock.Anything, mock.Anything).Return([]*udmrepo.ManifestEntryMetadata{
&meta1,
&meta2,
}, nil)
backupRepo.On("Time").Return(time.Now())
backupRepo.On("DeleteManifest", mock.Anything, mock.Anything).Return(nil)
backupRepo.On("PutManifest", mock.Anything, mock.Anything).Return(udmrepo.ID("fake-id"), nil)
return backupRepo
}(),
expectedID: manifest.ID("fake-id"),
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
ctx := context.Background()
id, err := NewShimRepo(tc.backupRepo).ReplaceManifests(ctx, map[string]string{}, nil)
if tc.expectedError != "" {
assert.EqualError(t, err, tc.expectedError)
} else {
assert.NoError(t, err)
}
assert.Equal(t, tc.expectedID, id)
})
}
}

View File

@@ -44,7 +44,8 @@ import (
)
// All function mainly used to make testing more convenient
var applyRetentionPolicyFunc = policy.ApplyRetentionPolicy
var treeForSourceFunc = policy.TreeForSource
var setPolicyFunc = policy.SetPolicy
var saveSnapshotFunc = snapshot.SaveSnapshot
var loadSnapshotFunc = snapshot.LoadSnapshot
var listSnapshotsFunc = snapshot.ListSnapshots
@@ -72,18 +73,46 @@ func newOptionalBool(b bool) *policy.OptionalBool {
return &ob
}
// setupDefaultPolicy set default policy for kopia
func setupDefaultPolicy() *policy.Tree {
defaultPolicy := *policy.DefaultPolicy
func getDefaultPolicy() *policy.Policy {
return &policy.Policy{
RetentionPolicy: policy.RetentionPolicy{
KeepLatest: newOptionalInt(math.MaxInt32),
KeepAnnual: newOptionalInt(math.MaxInt32),
KeepDaily: newOptionalInt(math.MaxInt32),
KeepHourly: newOptionalInt(math.MaxInt32),
KeepMonthly: newOptionalInt(math.MaxInt32),
KeepWeekly: newOptionalInt(math.MaxInt32),
},
CompressionPolicy: policy.CompressionPolicy{
CompressorName: "none",
},
UploadPolicy: policy.UploadPolicy{
MaxParallelFileReads: newOptionalInt(runtime.NumCPU()),
ParallelUploadAboveSize: nil,
},
SchedulingPolicy: policy.SchedulingPolicy{
Manual: true,
},
ErrorHandlingPolicy: policy.ErrorHandlingPolicy{
IgnoreUnknownTypes: newOptionalBool(true),
},
}
}
defaultPolicy.RetentionPolicy.KeepLatest = newOptionalInt(math.MaxInt32)
defaultPolicy.CompressionPolicy.CompressorName = "none"
defaultPolicy.UploadPolicy.MaxParallelFileReads = newOptionalInt(runtime.NumCPU())
defaultPolicy.UploadPolicy.ParallelUploadAboveSize = nil
defaultPolicy.SchedulingPolicy.Manual = true
defaultPolicy.ErrorHandlingPolicy.IgnoreUnknownTypes = newOptionalBool(true)
func setupDefaultPolicy(ctx context.Context, rep repo.RepositoryWriter, sourceInfo snapshot.SourceInfo) (*policy.Tree, error) {
// some internal operations from Kopia code retrieves policies from repo directly, so we need to persist the policy to repo
err := setPolicyFunc(ctx, rep, sourceInfo, getDefaultPolicy())
if err != nil {
return nil, errors.Wrap(err, "error to set policy")
}
return policy.BuildTree(nil, &defaultPolicy)
// retrieve policy from repo
policyTree, err := treeForSourceFunc(ctx, rep, sourceInfo)
if err != nil {
return nil, errors.Wrap(err, "error to retrieve policy")
}
return policyTree, nil
}
// Backup backup specific sourcePath and update progress
@@ -117,7 +146,7 @@ func Backup(ctx context.Context, fsUploader SnapshotUploader, repoWriter repo.Re
Host: udmrepo.GetRepoDomain(),
Path: filepath.Clean(realSource),
}
if sourceInfo.Path == "" {
if realSource == "" {
sourceInfo.Path = dir
}
@@ -213,7 +242,10 @@ func SnapshotSource(
log.Infof("Using parent snapshot %s, start time %v, end time %v, description %s", previous[i].ID, previous[i].StartTime.ToTime(), previous[i].EndTime.ToTime(), previous[i].Description)
}
policyTree := setupDefaultPolicy()
policyTree, err := setupDefaultPolicy(ctx, rep, sourceInfo)
if err != nil {
return "", 0, errors.Wrapf(err, "unable to set policy for si %v", sourceInfo)
}
manifest, err := u.Upload(ctx, rootDir, policyTree, sourceInfo, previous...)
if err != nil {
@@ -223,14 +255,12 @@ func SnapshotSource(
manifest.Tags = snapshotTags
manifest.Description = description
manifest.Pins = []string{"velero-pin"}
if _, err = saveSnapshotFunc(ctx, rep, manifest); err != nil {
return "", 0, errors.Wrapf(err, "Failed to save kopia manifest %v", manifest.ID)
}
_, err = applyRetentionPolicyFunc(ctx, rep, sourceInfo, true)
if err != nil {
return "", 0, errors.Wrapf(err, "Failed to apply kopia retention policy for si %v", sourceInfo)
}
if err = rep.Flush(ctx); err != nil {
return "", 0, errors.Wrapf(err, "Failed to flush kopia repository")
}
@@ -245,7 +275,7 @@ func reportSnapshotStatus(manifest *snapshot.Manifest, policyTree *policy.Tree)
var errs []string
if ds := manifest.RootEntry.DirSummary; ds != nil {
for _, ent := range ds.FailedEntries {
policy := policyTree.DefinedPolicy()
policy := policyTree.EffectivePolicy()
if !(policy != nil && bool(*policy.ErrorHandlingPolicy.IgnoreUnknownTypes) && strings.Contains(ent.Error, fs.ErrUnknown.Error())) {
errs = append(errs, fmt.Sprintf("Error when processing %v: %v", ent.EntryPath, ent.Error))
}

View File

@@ -26,6 +26,7 @@ import (
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/manifest"
"github.com/kopia/kopia/snapshot"
"github.com/kopia/kopia/snapshot/policy"
"github.com/kopia/kopia/snapshot/restore"
"github.com/kopia/kopia/snapshot/snapshotfs"
"github.com/pkg/errors"
@@ -58,7 +59,8 @@ func injectSnapshotFuncs() *snapshotMockes {
repoWriterMock: &repomocks.RepositoryWriter{},
}
applyRetentionPolicyFunc = s.policyMock.ApplyRetentionPolicy
setPolicyFunc = s.policyMock.SetPolicy
treeForSourceFunc = s.policyMock.TreeForSource
loadSnapshotFunc = s.snapshotMock.LoadSnapshot
saveSnapshotFunc = s.snapshotMock.SaveSnapshot
return s
@@ -135,13 +137,13 @@ func TestSnapshotSource(t *testing.T) {
notError: false,
},
{
name: "failed to apply policy",
name: "failed to set policy",
args: []mockArgs{
{methodName: "LoadSnapshot", returns: []interface{}{manifest, nil}},
{methodName: "SaveSnapshot", returns: []interface{}{manifest.ID, nil}},
{methodName: "TreeForSource", returns: []interface{}{nil, nil}},
{methodName: "ApplyRetentionPolicy", returns: []interface{}{nil, errors.New("failed to save snapshot")}},
{methodName: "SetPolicy", returns: []interface{}{nil}},
{methodName: "ApplyRetentionPolicy", returns: []interface{}{nil, nil}},
{methodName: "SetPolicy", returns: []interface{}{errors.New("failed to set policy")}},
{methodName: "Upload", returns: []interface{}{manifest, nil}},
{methodName: "Flush", returns: []interface{}{nil}},
},
@@ -232,7 +234,7 @@ func TestReportSnapshotStatus(t *testing.T) {
},
}
result, size, err := reportSnapshotStatus(manifest, setupDefaultPolicy())
result, size, err := reportSnapshotStatus(manifest, policy.BuildTree(nil, getDefaultPolicy()))
switch {
case tc.shouldError && err == nil:

View File

@@ -159,6 +159,10 @@ func (kp *kopiaProvider) RunBackup(
tags[uploader.SnapshotRequesterTag] = kp.requestorType
tags[uploader.SnapshotUploaderTag] = uploader.KopiaType
if realSource != "" {
realSource = fmt.Sprintf("%s/%s/%s", kp.requestorType, uploader.KopiaType, realSource)
}
snapshotInfo, isSnapshotEmpty, err := BackupFunc(ctx, kpUploader, repoWriter, path, realSource, forceFull, parentSnapshot, volMode, tags, log)
if err != nil {
if kpUploader.IsCanceled() {

View File

@@ -155,14 +155,19 @@ func RebindPVC(ctx context.Context, pvcGetter corev1client.CoreV1Interface,
}
// ResetPVBinding resets the binding info of a PV and adds the required labels so as to make it ready for binding
func ResetPVBinding(ctx context.Context, pvGetter corev1client.CoreV1Interface, pv *corev1api.PersistentVolume, labels map[string]string) (*corev1api.PersistentVolume, error) {
func ResetPVBinding(ctx context.Context, pvGetter corev1client.CoreV1Interface, pv *corev1api.PersistentVolume,
labels map[string]string, pvc *corev1api.PersistentVolumeClaim) (*corev1api.PersistentVolume, error) {
origBytes, err := json.Marshal(pv)
if err != nil {
return nil, errors.Wrap(err, "error marshaling original PV")
}
updated := pv.DeepCopy()
updated.Spec.ClaimRef = nil
updated.Spec.ClaimRef = &corev1api.ObjectReference{
Kind: pvc.Kind,
Namespace: pvc.Namespace,
Name: pvc.Name,
}
delete(updated.Annotations, KubeAnnBoundByController)
if labels != nil {
@@ -283,6 +288,10 @@ func WaitPVBound(ctx context.Context, pvGetter corev1client.CoreV1Interface, pvN
return false, nil
}
if tmpPV.Status.Phase != corev1api.VolumeBound {
return false, nil
}
if tmpPV.Spec.ClaimRef.Name != pvcName {
return false, errors.Errorf("pv has been bound by unexpected pvc %s/%s", tmpPV.Spec.ClaimRef.Namespace, tmpPV.Spec.ClaimRef.Name)
}

View File

@@ -568,10 +568,25 @@ func TestResetPVBinding(t *testing.T) {
},
}
pvcObject := &corev1api.PersistentVolumeClaim{
TypeMeta: metav1.TypeMeta{
Kind: "fake-kind-1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns-1",
Name: "fake-pvc-1",
Annotations: map[string]string{
KubeAnnBindCompleted: "true",
KubeAnnBoundByController: "true",
},
},
}
tests := []struct {
name string
clientObj []runtime.Object
pv *corev1api.PersistentVolume
pvc *corev1api.PersistentVolumeClaim
labels map[string]string
reactors []reactor
result *corev1api.PersistentVolume
@@ -580,6 +595,7 @@ func TestResetPVBinding(t *testing.T) {
{
name: "path fail",
pv: pvObject,
pvc: pvcObject,
clientObj: []runtime.Object{pvObject},
reactors: []reactor{
{
@@ -595,6 +611,7 @@ func TestResetPVBinding(t *testing.T) {
{
name: "succeed",
pv: pvObject,
pvc: pvcObject,
labels: map[string]string{
"fake-label-1": "fake-value-1",
"fake-label-2": "fake-value-2",
@@ -608,6 +625,13 @@ func TestResetPVBinding(t *testing.T) {
"fake-label-2": "fake-value-2",
},
},
Spec: corev1api.PersistentVolumeSpec{
ClaimRef: &corev1api.ObjectReference{
Kind: "fake-kind-1",
Namespace: "fake-ns-1",
Name: "fake-pvc-1",
},
},
},
},
}
@@ -622,7 +646,7 @@ func TestResetPVBinding(t *testing.T) {
var kubeClient kubernetes.Interface = fakeKubeClient
result, err := ResetPVBinding(context.Background(), kubeClient.CoreV1(), test.pv, test.labels)
result, err := ResetPVBinding(context.Background(), kubeClient.CoreV1(), test.pv, test.labels, test.pvc)
if err != nil {
assert.EqualError(t, err, test.err)
} else {
@@ -740,6 +764,18 @@ func TestWaitPVBound(t *testing.T) {
},
err: "error to wait for bound of PV: timed out waiting for the condition",
},
{
name: "pvc status not bound",
pvName: "fake-pv",
kubeClientObj: []runtime.Object{
&corev1api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pv",
},
},
},
err: "error to wait for bound of PV: timed out waiting for the condition",
},
{
name: "pvc claimRef pvc name mismatch",
pvName: "fake-pv",
@@ -757,6 +793,9 @@ func TestWaitPVBound(t *testing.T) {
Name: "fake-pvc-1",
},
},
Status: corev1api.PersistentVolumeStatus{
Phase: "Bound",
},
},
},
err: "error to wait for bound of PV: pv has been bound by unexpected pvc fake-ns/fake-pvc-1",
@@ -778,6 +817,9 @@ func TestWaitPVBound(t *testing.T) {
Name: "fake-pvc",
},
},
Status: corev1api.PersistentVolumeStatus{
Phase: "Bound",
},
},
},
err: "error to wait for bound of PV: pv has been bound by unexpected pvc fake-ns-1/fake-pvc",
@@ -799,6 +841,9 @@ func TestWaitPVBound(t *testing.T) {
Namespace: "fake-ns",
},
},
Status: corev1api.PersistentVolumeStatus{
Phase: "Bound",
},
},
},
expectedPV: &corev1api.PersistentVolume{
@@ -812,6 +857,9 @@ func TestWaitPVBound(t *testing.T) {
Namespace: "fake-ns",
},
},
Status: corev1api.PersistentVolumeStatus{
Phase: "Bound",
},
},
},
}

View File

@@ -120,6 +120,10 @@ spec:
ttl: 24h0m0s
# whether pod volume file system backup should be used for all volumes by default.
defaultVolumesToFsBackup: true
# Whether snapshot data should be moved. If set, data movement is launched after the snapshot is created.
snapshotMoveData: true
# The data mover to be used by the backup. If the value is "" or "velero", the built-in data mover will be used.
datamover: velero
# Actions to perform at different times during a backup. The only hook supported is
# executing a command in a container in a pod using the pod exec API. Optional.
hooks:

View File

@@ -118,6 +118,10 @@ spec:
defaultVolumesToFsBackup: true
# The labels you want on backup objects, created from this schedule (instead of copying the labels you have on schedule object itself).
# When this field is set, the labels from the Schedule resource are not copied to the Backup resource.
# Whether snapshot data should be moved. If set, data movement is launched after the snapshot is created.
snapshotMoveData: true
# The data mover to be used by the backup. If the value is "" or "velero", the built-in data mover will be used.
datamover: velero
metadata:
labels:
labelname: somelabelvalue

View File

@@ -0,0 +1,399 @@
---
title: "CSI Snapshot Data Movement"
layout: docs
---
CSI Snapshot Data Movement is built according to the [Volume Snapshot Data Movement design][1] and is specifically designed to move CSI snapshot data to a backup storage location.
CSI Snapshot Data Movement takes CSI snapshots through the CSI plugin in nearly the same way as [CSI snapshot backup][2]. However, it doesn't stop after a snapshot is taken. Instead, it tries to access the snapshot data through various data movers and back up the data to a backup storage connected to the data movers.
Consequently, the volume data is backed up to a pre-defined backup storage in a consistent manner.
After the backup completes, the CSI snapshot will be removed by Velero and the snapshot data space will be released on the storage side.
CSI Snapshot Data Movement is useful in below scenarios:
- For on-premises users, the storage usually doesn't support durable snapshots, so it is impossible/less efficient/cost ineffective to keep volume snapshots by the storage, as required by the [CSI snapshot backup][2]. This feature helps to move the snapshot data to a storage with lower cost and larger scale for long time preservation.
- For public cloud users, this feature helps users to fulfil the multiple cloud strategy. It allows users to back up volume snapshots from one cloud provider and preserve or restore the data to another cloud provider. Then users will be free to flow their business data across cloud providers based on Velero backup and restore.
Besides, Velero [File System Backup][3] which could also back up the volume data to a pre-defined backup storage. CSI Snapshot Data Movement works together with [File System Backup][3] to satisfy different requirements for the above scenarios. And whenever available, CSI Snapshot Data Movement should be used in preference since the [File System Backup][3] reads data from the live PV, in which way the data is not captured at the same point in time, so is less consistent.
Moreover, CSI Snapshot Data Movement brings more possible ways of data access, i.e., accessing the data from the block level, either fully or incrementally.
On the other hand, there are quite some cases that CSI snapshot is not available (i.e., you need a volume snapshot plugin for your storage platform, or you're using EFS, NFS, emptyDir, local, or any other volume type that doesn't have a native snapshot), then [File System Backup][3] will be the only option.
CSI Snapshot Data Movement supports both built-in data mover and customized data movers. For the details of how Velero works with customized data movers, check the [Volume Snapshot Data Movement design][1]. Velero provides a built-in data mover which uses Velero built-in uploaders (at present the available uploader is Kopia uploader) to read the snapshot data and write to the Unified Repository (by default implemented by Kopia repository).
The way for Velero built-in data mover to access the snapshot data is based on the hostpath access by Velero node-agent, so the node-agent pods need to run as root user and even under privileged mode in some environments, as same as [File System Backup][3].
## Setup CSI Snapshot Data Movement
## Prerequisites
1. The source cluster is Kubernetes version 1.20 or greater.
2. The source cluster is running a CSI driver capable of support volume snapshots at the [v1 API level][4].
3. CSI Snapshot Data Movement requires the Kubernetes [MountPropagation feature][5].
### Install Velero Node Agent
Velero Node Agent is a Kubernetes daemonset that hosts Velero data movement modules, i.e., data mover controller, uploader & repository.
If you are using Velero built-in data mover, Node Agent must be installed. To install Node Agent, use the `--use-node-agent` flag.
```
velero install --use-node-agent
```
### Configure Node Agent DaemonSet spec
After installation, some PaaS/CaaS platforms based on Kubernetes also require modifications the node-agent DaemonSet spec.
The steps in this section are only needed if you are installing on RancherOS, OpenShift, VMware Tanzu Kubernetes Grid
Integrated Edition (formerly VMware Enterprise PKS), or Microsoft Azure.
**RancherOS**
Update the host path for volumes in the node-agent DaemonSet in the Velero namespace from `/var/lib/kubelet/pods` to
`/opt/rke/var/lib/kubelet/pods`.
```yaml
hostPath:
path: /var/lib/kubelet/pods
```
to
```yaml
hostPath:
path: /opt/rke/var/lib/kubelet/pods
```
**OpenShift**
To mount the correct hostpath to pods volumes, run the node-agent pod in `privileged` mode.
1. Add the `velero` ServiceAccount to the `privileged` SCC:
```
oc adm policy add-scc-to-user privileged -z velero -n velero
```
2. Modify the DaemonSet yaml to request a privileged mode:
```diff
@@ -67,3 +67,5 @@ spec:
value: /credentials/cloud
- name: VELERO_SCRATCH_DIR
value: /scratch
+ securityContext:
+ privileged: true
```
or
```shell
oc patch ds/node-agent \
--namespace velero \
--type json \
-p '[{"op":"add","path":"/spec/template/spec/containers/0/securityContext","value": { "privileged": true}}]'
```
If node-agent is not running in a privileged mode, it will not be able to access snapshot volumes within the mounted
hostpath directory because of the default enforced SELinux mode configured in the host system level. You can
[create a custom SCC][6] to relax the
security in your cluster so that node-agent pods are allowed to use the hostPath volume plugin without granting
them access to the `privileged` SCC.
By default a userland openshift namespace will not schedule pods on all nodes in the cluster.
To schedule on all nodes the namespace needs an annotation:
```
oc annotate namespace <velero namespace> openshift.io/node-selector=""
```
This should be done before velero installation.
Or the ds needs to be deleted and recreated:
```
oc get ds node-agent -o yaml -n <velero namespace> > ds.yaml
oc annotate namespace <velero namespace> openshift.io/node-selector=""
oc create -n <velero namespace> -f ds.yaml
```
**VMware Tanzu Kubernetes Grid Integrated Edition (formerly VMware Enterprise PKS)**
You need to enable the `Allow Privileged` option in your plan configuration so that Velero is able to mount the hostpath.
The hostPath should be changed from `/var/lib/kubelet/pods` to `/var/vcap/data/kubelet/pods`
```yaml
hostPath:
path: /var/vcap/data/kubelet/pods
```
### Configure A Backup Storage Location
At present, Velero backup repository supports object storage as the backup storage. Velero gets the parameters from the
[BackupStorageLocation][8] to compose the URL to the backup storage.
Velero's known object storage providers are included here [supported providers][9], for which, Velero pre-defines the endpoints. If you want to use a different backup storage, make sure it is S3 compatible and you provide the correct bucket name and endpoint in BackupStorageLocation. Velero handles the creation of the backup repo prefix in the backup storage, so make sure it is specified in BackupStorageLocation correctly.
Velero creates one backup repository per namespace. For example, if backing up 2 namespaces, namespace1 and namespace2, using kopia repository on AWS S3, the full backup repo path for namespace1 would be `https://s3-us-west-2.amazonaws.com/bucket/kopia/ns1` and for namespace2 would be `https://s3-us-west-2.amazonaws.com/bucket/kopia/ns2`.
There may be additional installation steps depending on the cloud provider plugin you are using. You should refer to the [plugin specific documentation][9] for the must up to date information.
**Note:** Currently, Velero creates a secret named `velero-repo-credentials` in the velero install namespace, containing a default backup repository password.
You can update the secret with your own password encoded as base64 prior to the first backup (i.e., [File System Backup][3], snapshot data movements) targeting to the backup repository. The value of the key to update is
```
data:
repository-password: <custom-password>
```
Backup repository is created during the first execution of backup targeting to it after installing Velero with node agent. If you update the secret password after the first backup which created the backup repository, then Velero will not be able to connect with the older backups.
## Install Velero with CSI support on source cluster
On source cluster, Velero needs to manipulate CSI snapshots through the CSI volume snapshot APIs, so you must enable the `EnableCSI` feature flag and install the Velero [CSI plugin][2] on the Velero server.
Both of these can be added with the `velero install` command.
```bash
velero install \
--features=EnableCSI \
--plugins=<object storage plugin>,velero/velero-plugin-for-csi:v0.6.0 \
...
```
### Configure storage class on target cluster
For Velero built-in data movement, CSI facilities are not required necessarily in the target cluster. On the other hand, Velero built-in data movement creates a PVC with the same specification as it is in the source cluster and expects the volume to be provisioned similarly. For example, the same storage class should be working in the target cluster.
By default, Velero won't restore storage class resources from the backup since they are cluster scope resources. However, if you specify the `--include-cluster-resources` restore flag, they will be restored. For a cross provider scenario, the storage class from the source cluster is probably not usable in the target cluster.
In either of the above cases, the best practice is to create a working storage class in the target cluster with the same name as it in the source cluster. In this way, even though `--include-cluster-resources` is specified, Velero restore will skip restoring the storage class since it finds an existing one.
Otherwise, if the storage class name in the target cluster is different, you can change the PVC's storage class name during restore by the [changing PV/PVC storage class][10] method. You can also configure to skip restoring the storage class resources from the backup since they are not usable.
### Customized Data Movers
If you are using a customized data mover, follow the data mover's instructions for any further prerequisites.
For Velero side configurations mentioned above, the installation and configuration of node-agent may not be required.
## To back up
Velero uses a new custom resource `DataUpload` to drive the data movement. The selected data mover will watch and reconcile the CRs.
Velero allows users to decide whether the CSI snapshot data should be moved per backup.
Velero also allows users to select the data mover to move the CSI snapshot data per backup.
The both selections are simply done by a parameter when running the backup.
To take a backup with Velero's built-in data mover:
```bash
velero backup create NAME --snapshot-move-data OPTIONS...
```
Or if you want to use a customized data mover:
```bash
velero backup create NAME --snapshot-move-data --data-mover DATA-MOVER-NAME OPTIONS...
```
When the backup starts, you will see the `VolumeSnapshot` and `VolumeSnapshotContent` objects created, but after the backup finishes, the objects will disppear.
After snapshots are created, you will see one or more `DataUpload` CRs created.
You may also see some intermediate objects (i.e., pods, PVCs, PVs) created in Velero namespace or the cluster scope, they are to help data movers to move data. And they will be removed after the backup completes.
The phase of a `DataUpload` CR changes several times during the backup process and finally goes to one of the terminal status, `Completed`, `Failed` or `Cancelled`. You can see the phase changes as well as the data upload progress by watching the `DataUpload` CRs:
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=YOUR_BACKUP_NAME -w
```
When the backup completes, you can view information about the backups:
```bash
velero backup describe YOUR_BACKUP_NAME
```
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=YOUR_BACKUP_NAME -o yaml
```
## To restore
You don't need to set any additional information when creating a data mover restore. The configurations are automatically retrieved from the backup, i.e., whether data movement should be involved and which data mover conducts the data movement.
To restore from your Velero backup:
```bash
velero restore create --from-backup BACKUP_NAME OPTIONS...
```
When the restore starts, you will see one or more `DataDownload` CRs created.
You may also see some intermediate objects (i.e., pods, PVCs, PVs) created in Velero namespace or the cluster scope, they are to help data movers to move data. And they will be removed after the restore completes.
The phase of a `DataDownload` CR changes several times during the restore process and finally goes to one of the terminal status, `Completed`, `Failed` or `Cancelled`. You can see the phase changes as well as the data download progress by watching the DataDownload CRs:
```bash
kubectl -n velero get datadownloads -l velero.io/restore-name=YOUR_RESTORE_NAME -w
```
When the restore completes, view information about your restores:
```bash
velero restore describe YOUR_RESTORE_NAME
```
```bash
kubectl -n velero get datadownloads -l velero.io/restore-name=YOUR_RESTORE_NAME -o yaml
```
## Limitations
- CSI and CSI snapshot support both file system volume mode and block volume mode. At present, Velero built-in data mover doesn't support
block mode volume or volume snapshot.
- [Velero built-in data mover] At present, Velero uses a static, common encryption key for all backup repositories it creates. **This means
that anyone who has access to your backup storage can decrypt your backup data**. Make sure that you limit access
to the backup storage appropriately.
- [Velero built-in data mover] Even though the backup data could be incrementally preserved, for a single file data, Velero built-in data mover leverages on deduplication to find the difference to be saved. This means that large files (such as ones storing a database) will take a long time to scan for data deduplication, even if the actual difference is small.
- [Velero built-in data mover] You may need to [customize the resource limits][11] to make sure backups complete successfully for massive small files or large backup size cases, for more details refer to [Velero file system level backup performance guide][12].
## Troubleshooting
Run the following checks:
Are your Velero server and daemonset pods running?
```bash
kubectl get pods -n velero
```
Does your backup repository exist, and is it ready?
```bash
velero repo get
velero repo get REPO_NAME -o yaml
```
Are there any errors in your Velero backup/restore?
```bash
velero backup describe BACKUP_NAME
velero backup logs BACKUP_NAME
velero restore describe RESTORE_NAME
velero restore logs RESTORE_NAME
```
What is the status of your `DataUpload` and `DataDownload`?
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=BACKUP_NAME -o yaml
kubectl -n velero get datadownloads -l velero.io/restore-name=RESTORE_NAME -o yaml
```
Is there any useful information in the Velero server or daemonset pod logs?
```bash
kubectl -n velero logs deploy/velero
kubectl -n velero logs DAEMON_POD_NAME
```
**NOTE**: You can increase the verbosity of the pod logs by adding `--log-level=debug` as an argument to the container command in the deployment/daemonset pod template spec.
If you are using a customized data mover, follow the data mover's instruction for additional troubleshooting methods.
## How backup and restore work
CSI snapshot data movement is a combination of CSI snapshot and data movement, which is jointly executed by Velero server, CSI plugin and the data mover.
This section lists some general concept of how CSI snapshot data movement backup and restore work. For the detailed mechanisms and workflows, you can check the [Volume Snapshot Data Movement design][1].
### Custom resource and controllers
Velero has three custom resource definitions and associated controllers:
- `DataUpload` - represents a data upload of a volume snapshot. The CSI plugin creates one `DataUpload` per CSI snapshot. Data movers need to handle these CRs to finish the data upload process.
Velero built-in data mover runs a controller for this resource on each node (in node-agent daemonset). Controllers from different nodes may handle one CR in different phases, but finally the data transfer is done by one single controller which will call uploaders from the backend.
- `DataDownload` - represents a data download of a volume snapshot. The CSI plugin creates one `DataDownload` per volume to be restored. Data movers need to handle these CRs to finish the data upload process.
Velero built-in data mover runs a controller for this resource on each node (in node-agent daemonset). Controllers from different nodes may handle one CR in different phases, but finally the data transfer is done by one single controller which will call uploaders from the backend.
- `BackupRepository` - represents/manages the lifecycle of Velero's backup repositories. Velero creates a backup repository per namespace when the first CSI snapshot backup/restore for a namespace is requested. You can see information about your Velero's backup repositories by running `velero repo get`.
This CR is used by Velero built-in data movers, customized data movers may or may not use it.
For other resources or controllers involved by customized data movers, check the data mover's instructions.
### Backup
Velero backs up resources for CSI snapshot data movement backup in the same way as other backup types. When it encounters a PVC, particular logics will be conducted:
- When it finds a PVC object, Velero calls CSI plugin through a Backup Item Action.
- CSI plugin first takes a CSI snapshot to the PVC by creating the `VolumeSnapshot` and `VolumeSnapshotContent`.
- CSI plugin checks if a data movement is required, if so it creates a `DataUpload` CR and then returns to Velero backup.
- Velero now is able to back up other resources, including other PVC objects.
- Velero backup controller periodically queries the data movement status from CSI plugin, the period is configurable through the Velero server parameter `--item-operation-sync-frequency`, by default it is 10s. On the call, CSI plugin turns to check the phase of the `DataUpload` CRs.
- When all the `DataUpload` CRs come to a terminal state (i.e., `Completed`, `Failed` or `Cancelled`), Velero backup perists all the necessary information and finish the backup.
- CSI plugin expects a data mover to handle the `DataUpload` CR. If no data mover is configured for the backup, Velero built-in data mover will handle it.
- If the `DataUpload` CR does not reach to the terminal state with in the given time, the `DataUpload` CR will be cancelled. You can set the timeout value per backup through the `--item-operation-timeout` parameter, the default value is `4 hours`.
- Velero built-in data mover creates a volume from the CSI snapshot and transfer the data to the backup storage according to the backup storage location defined by users.
- After the volume is created from the CSI snapshot, Velero built-in data mover waits for Kubernetes to provision the volume, this may take some time varying from storage providers, but if the provision cannot be finished in a given time, Velero built-in data mover will cancel this `DataUpload` CR. The timeout is configurable through a node-agent's parameter `data-mover-prepare-timeout`, the default value is 30 minutes.
- When the data transfer completes or any error happens, Velero built-in data mover sets the `DataUpload` CR to the terminal state, either `Completed` or `Failed`.
- Velero built-in data mover also monitors the cancellation request to the `DataUpload` CR, once that happens, it cancels its ongoing activities, cleans up the intermediate resources and set the `DataUpload` CR to `Cancelled`.
### Restore
Velero restores resources for CSI snapshot data movement restore in the same way as other restore types. When it encounters a PVC, particular logics will be conducted:
- When it finds a PVC object, Velero calls CSI plugin through a Restore Item Action.
- CSI plugin checks the backup information, if a data movement was involved, it creates a `DataDownload` CR and then returns to Velero restore.
- Velero is now able to restore other resources, including other PVC objects.
- Velero restore controller periodically queries the data movement status from CSI plugin, the period is configurable through the Velero server parameter `--item-operation-sync-frequency`, by default it is 10s. On the call, CSI plugin turns to check the phase of the `DataDownload` CRs.
- When all `DataDownload` CRs come to a terminal state (i.e., `Completed`, `Failed` or `Cancelled`), Velero restore will finish.
- CSI plugin expects the same data mover for the backup to handle the `DataDownload` CR. If no data mover was configured for the backup, Velero built-in data mover will handle it.
- If the `DataDownload` CR does not reach to the terminal state with in the given time, the `DataDownload` CR will be cancelled. You can set the timeout value per backup through the same `--item-operation-timeout` parameter.
- Velero built-in data mover creates a volume with the same specification of the source volume.
- Velero built-in data mover waits for Kubernetes to provision the volume, this may take some time varying from storage providers, but if the provision cannot be finished in a given time, Velero built-in data mover will cancel this `DataDownload` CR. The timeout is configurable through the same node-agent's parameter `data-mover-prepare-timeout`.
- After the volume is provisioned, Velero built-in data mover starts to transfer the data from the backup storage according to the backup storage location defined by users.
- When the data transfer completes or any error happens, Velero built-in data mover sets the `DataDownload` CR to the terminal state, either `Completed` or `Failed`.
- Velero built-in data mover also monitors the cancellation request to the `DataDownload` CR, once that happens, it cancels its ongoing activities, cleans up the intermediate resources and set the `DataDownload` CR to `Cancelled`.
### Parallelism
Velero calls the CSI plugin concurrently for the volume, so `DataUpload`/`DataDownload` CRs are created concurrently by the CSI plugin. For more details about the call between Velero and CSI plugin, check the [Volume Snapshot Data Movement design][1].
In which manner the `DataUpload`/`DataDownload` CRs are processed is totally decided by the data mover you select for the backup/restore.
For Velero built-in data mover, it uses Kubernetes' scheduler to mount a snapshot volume/restore volume associated to a `DataUpload`/`DataDownload` CR into a specific node, and then the `DataUpload`/`DataDownload` controller (in node-agent daemonset) in that node will handle the `DataUpload`/`DataDownload`.
At present, a `DataUpload`/`DataDownload` controller in one node handles one request at a time.
That is to say, the snapshot volumes/restore volumes may spread in different nodes, then their associated `DataUpload`/`DataDownload` CRs will be processed in parallel; while for the snapshot volumes/restore volumes in the same node, their associated `DataUpload`/`DataDownload` CRs are processed sequentially.
You can check in which node the `DataUpload`/`DataDownload` CRs are processed and their parallelism by watching the `DataUpload`/`DataDownload` CRs:
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=YOUR_BACKUP_NAME -w
```
```bash
kubectl -n velero get datadownloads -l velero.io/restore-name=YOUR_RESTORE_NAME -w
```
### Cancellation
At present, Velero backup and restore doesn't support end to end cancellation that is launched by users.
However, Velero cancels the `DataUpload`/`DataDownload` in below scenarios automatically:
- When Velero server is restarted
- When node-agent is restarted
- When an ongoing backup/restore is deleted
- When a backup/restore does not finish before the item operation timeout (default value is `4 hours`)
Customized data movers that support cancellation could cancel their ongoing tasks and clean up any intermediate resources. If you are using Velero built-in data mover, the cancellation is supported.
[1]: https://github.com/vmware-tanzu/velero/pull/5968
[2]: csi.md
[3]: file-system-backup.md
[4]: https://kubernetes.io/blog/2020/12/10/kubernetes-1.20-volume-snapshot-moves-to-ga/
[5]: https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation
[6]: https://docs.openshift.com/container-platform/3.11/admin_guide/manage_scc.html
[7]: https://docs.microsoft.com/en-us/azure/aks/azure-files-dynamic-pv
[8]: api-types/backupstoragelocation.md
[9]: supported-providers.md
[10]: restore-reference.md#changing-pv/pvc-Storage-Classes
[11]: customize-installation.md#customize-resource-requests-and-limits
[12]: performance-guidance.md

View File

@@ -37,11 +37,53 @@ This section documents some of the choices made during implementation of the Vel
1. VolumeSnapshots created by the Velero CSI plugins are retained only for the lifetime of the backup even if the `DeletionPolicy` on the VolumeSnapshotClass is set to `Retain`. To accomplish this, during deletion of the backup the prior to deleting the VolumeSnapshot, VolumeSnapshotContent object is patched to set its `DeletionPolicy` to `Delete`. Deleting the VolumeSnapshot object will result in cascade delete of the VolumeSnapshotContent and the snapshot in the storage provider.
1. VolumeSnapshotContent objects created during a `velero backup` that are dangling, unbound to a VolumeSnapshot object, will be discovered, using labels, and deleted on backup deletion.
1. The Velero CSI plugins, to backup CSI backed PVCs, will choose the VolumeSnapshotClass in the cluster that has the same driver name and also has the `velero.io/csi-volumesnapshot-class` label set on it, like
```yaml
velero.io/csi-volumesnapshot-class: "true"
```
1. The VolumeSnapshot objects will be removed from the cluster after the backup is uploaded to the object storage, so that the namespace that is backed up can be deleted without removing the snapshot in the storage provider if the `DeletionPolicy` is `Delete.
1. The Velero CSI plugins, to backup CSI backed PVCs, will choose the VolumeSnapshotClass in the cluster based on the following logic:
1. **Default Behavior:**
You can simply create a VolumeSnapshotClass for a particular driver and put a label on it to indicate that it is the default VolumeSnapshotClass for that driver. For example, if you want to create a VolumeSnapshotClass for the CSI driver `disk.csi.cloud.com` for taking snapshots of disks created with `disk.csi.cloud.com` based storage classes, you can create a VolumeSnapshotClass like this:
```yaml
apiVersion: snapshot.storage.k8s.io/v1
kind: VolumeSnapshotClass
metadata:
name: test-snapclass
labels:
velero.io/csi-volumesnapshot-class: "true"
driver: disk.csi.cloud.com
```
Note: For each driver type, there should only be 1 VolumeSnapshotClass with the label `velero.io/csi-volumesnapshot-class: "true"`.
2. **Choose VolumeSnapshotClass for a particular Backup Or Schedule:**
If you want to use a particular VolumeSnapshotClass for a particular backup or schedule, you can add a annotation to the backup or schedule to indicate which VolumeSnapshotClass to use. For example, if you want to use the VolumeSnapshotClass `test-snapclass` for a particular backup for snapshotting PVCs of `disk.csi.cloud.com`, you can create a backup like this:
```yaml
apiVersion: velero.io/v1
kind: Backup
metadata:
name: test-backup
annotations:
velero.io/csi-volumesnapshot-class_disk.csi.cloud.com: "test-snapclass"
spec:
includedNamespaces:
- default
```
Note: Please ensure all your annotations are in lowercase. And follow the following format: `velero.io/csi-volumesnapshot-class_<driver name> = <VolumeSnapshotClass Name>`
3. **Choosing VolumeSnapshotClass for a particular PVC:**
If you want to use a particular VolumeSnapshotClass for a particular PVC, you can add a annotation to the PVC to indicate which VolumeSnapshotClass to use. This overrides any annotation added to backup or schedule. For example, if you want to use the VolumeSnapshotClass `test-snapclass` for a particular PVC, you can create a PVC like this:
```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-pvc
annotations:
velero.io/csi-volumesnapshot-class: "test-snapclass"
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: disk.csi.cloud.com
```
1. The VolumeSnapshot objects will be removed from the cluster after the backup is uploaded to the object storage, so that the namespace that is backed up can be deleted without removing the snapshot in the storage provider if the `DeletionPolicy` is `Delete`.
## How it Works - Overview

View File

@@ -13,6 +13,8 @@ the supported cloud providers block storage offerings (Amazon EBS Volumes, Az
It also provides a plugin model that enables anyone to implement additional object and block storage backends, outside the
main Velero repository.
If your storage supports CSI (Container Storage Interface) snapshots, Velero also allows you to take snapshots through CSI and then optionally move the snapshot data to a different storage location.
Velero's File System Backup is an addition to the aforementioned snapshot approaches. Its pros and cons are listed below:
Pros:
- It is capable of backing up and restoring almost any type of Kubernetes volume. Therefore, if you need a volume snapshot
@@ -22,9 +24,8 @@ have a native snapshot concept, FSB might be for you.
the one backing Kubernetes volumes, for example, a durable storage.
Cons:
- It backs up data from the live file system, so the backup data is less consistent than the snapshot approaches.
- It access the file system from the mounted hostpath directory, so the pods need to run as root user and even under
privileged mode in some environments.
- It backs up data from the live file system, in which way the data is not captured at the same point in time, so is less consistent than the snapshot approaches.
- It access the file system from the mounted hostpath directory, so Velero Node Agent pods need to run as root user and even under privileged mode in some environments.
**NOTE:** hostPath volumes are not supported, but the [local volume type][5] is supported.
@@ -64,6 +65,15 @@ for namespace2 would be `https://s3-us-west-2.amazonaws.com/bucket/kopia/ns2`.
There may be additional installation steps depending on the cloud provider plugin you are using. You should refer to the
[plugin specific documentation](supported-providers.md) for the must up to date information.
**Note:** Currently, Velero creates a secret named `velero-repo-credentials` in the velero install namespace, containing a default backup repository password.
You can update the secret with your own password encoded as base64 prior to the first backup (i.e., FS Backup, data mover) targeting to the backup repository. The value of the key to update is
```
data:
repository-password: <custom-password>
```
Backup repository is created during the first execution of backup targeting to it after installing Velero with node agent. If you update the secret password after the first
backup which created the backup repository, then Velero will not be able to connect with the older backups.
### Configure Node Agent DaemonSet spec
After installation, some PaaS/CaaS platforms based on Kubernetes also require modifications the node-agent DaemonSet spec.
@@ -98,7 +108,7 @@ To mount the correct hostpath to pods volumes, run the node-agent pod in `privil
1. Add the `velero` ServiceAccount to the `privileged` SCC:
```
$ oc adm policy add-scc-to-user privileged -z velero -n velero
oc adm policy add-scc-to-user privileged -z velero -n velero
```
2. Modify the DaemonSet yaml to request a privileged mode:
@@ -157,20 +167,6 @@ hostPath:
path: /var/vcap/data/kubelet/pods
```
**Microsoft Azure**
If you are using [Azure Files][8], you need to add `nouser_xattr` to your storage class's `mountOptions`.
See [this restic issue][9] for more details.
You can use the following command to patch the storage class:
```bash
kubectl patch storageclass/<YOUR_AZURE_FILE_STORAGE_CLASS_NAME> \
--type json \
--patch '[{"op":"add","path":"/mountOptions/-","value":"nouser_xattr"}]'
```
## To back up
Velero supports two approaches of discovering pod volumes that need to be backed up using FSB:
@@ -354,6 +350,7 @@ to make sure backups complete successfully for massive small files or large back
For this reason, FSB can only backup volumes that are mounted by a pod and not directly from the PVC. For orphan PVC/PV pairs
(without running pods), some Velero users overcame this limitation running a staging pod (i.e. a busybox or alpine container
with an infinite sleep) to mount these PVC/PV pairs prior taking a Velero backup.
- Velero File System Backup expects volumes to be mounted under `<hostPath>/<pod UID>` (`hostPath` is configurable as mentioned in [Configure Node Agent DaemonSet spec](#configure-node-agent-daemonset-spec)). Some Kubernetes systems (i.e., [vCluster][11]) don't mount volumes under the `<pod UID>` sub-dir, Velero File System Backup is not working with them.
## Customize Restore Helper Container
@@ -595,3 +592,4 @@ To solve this, a controller was written by Thomann Bits&Beats: [velero-pvc-watch
[8]: https://docs.microsoft.com/en-us/azure/aks/azure-files-dynamic-pv
[9]: https://github.com/restic/restic/issues/1800
[10]: customize-installation.md#default-pod-volume-backup-to-file-system-backup
[11]: https://www.vcluster.com/

View File

@@ -29,11 +29,14 @@ Below is the two-step of using resource modifiers to modify the resources during
version: v1
resourceModifierRules:
- conditions:
groupKind: persistentvolumeclaims
groupResource: persistentvolumeclaims
resourceNameRegex: "^mysql.*$"
namespaces:
- bar
- foo
labelSelector:
matchLabels:
foo: bar
patches:
- operation: replace
path: "/spec/storageClassName"
@@ -42,9 +45,10 @@ resourceModifierRules:
path: "/metadata/labels/test"
```
- The above configmap will apply the JSON Patch to all the PVCs in the namespaces bar and foo with name starting with mysql. The JSON Patch will replace the storageClassName with "premium" and remove the label "test" from the PVCs.
- The above configmap will apply the JSON Patch to all the PVCs in the namespaces bar and foo with name starting with mysql and match label `foo: bar`. The JSON Patch will replace the storageClassName with "premium" and remove the label "test" from the PVCs.
- Note that the Namespace here is the original namespace of the backed up resource, not the new namespace where the resource is going to be restored.
- You can specify multiple JSON Patches for a particular resource. The patches will be applied in the order specified in the configmap. A subsequent patch is applied in order and if multiple patches are specified for the same path, the last patch will override the previous patches.
- You can can specify multiple resourceModifierRules in the configmap. The rules will be applied in the order specified in the configmap.
- You can specify multiple resourceModifierRules in the configmap. The rules will be applied in the order specified in the configmap.
### Operations supported by the JSON Patch RFC:
- add
@@ -61,7 +65,7 @@ resourceModifierRules:
version: v1
resourceModifierRules:
- conditions:
groupKind: persistentvolumeclaims
groupResource: persistentvolumeclaims
resourceNameRegex: ".*"
namespaces:
- bar
@@ -80,7 +84,7 @@ resourceModifierRules:
version: v1
resourceModifierRules:
- conditions:
groupKind: deployments.apps
groupResource: deployments.apps
resourceNameRegex: "^test-.*$"
namespaces:
- bar

View File

@@ -120,6 +120,10 @@ spec:
ttl: 24h0m0s
# whether pod volume file system backup should be used for all volumes by default.
defaultVolumesToFsBackup: true
# Whether snapshot data should be moved. If set, data movement is launched after the snapshot is created.
snapshotMoveData: true
# The data mover to be used by the backup. If the value is "" or "velero", the built-in data mover will be used.
datamover: velero
# Actions to perform at different times during a backup. The only hook supported is
# executing a command in a container in a pod using the pod exec API. Optional.
hooks:

View File

@@ -118,6 +118,10 @@ spec:
defaultVolumesToFsBackup: true
# The labels you want on backup objects, created from this schedule (instead of copying the labels you have on schedule object itself).
# When this field is set, the labels from the Schedule resource are not copied to the Backup resource.
# Whether snapshot data should be moved. If set, data movement is launched after the snapshot is created.
snapshotMoveData: true
# The data mover to be used by the backup. If the value is "" or "velero", the built-in data mover will be used.
datamover: velero
metadata:
labels:
labelname: somelabelvalue

View File

@@ -0,0 +1,399 @@
---
title: "CSI Snapshot Data Movement"
layout: docs
---
CSI Snapshot Data Movement is built according to the [Volume Snapshot Data Movement design][1] and is specifically designed to move CSI snapshot data to a backup storage location.
CSI Snapshot Data Movement takes CSI snapshots through the CSI plugin in nearly the same way as [CSI snapshot backup][2]. However, it doesn't stop after a snapshot is taken. Instead, it tries to access the snapshot data through various data movers and back up the data to a backup storage connected to the data movers.
Consequently, the volume data is backed up to a pre-defined backup storage in a consistent manner.
After the backup completes, the CSI snapshot will be removed by Velero and the snapshot data space will be released on the storage side.
CSI Snapshot Data Movement is useful in below scenarios:
- For on-premises users, the storage usually doesn't support durable snapshots, so it is impossible/less efficient/cost ineffective to keep volume snapshots by the storage, as required by the [CSI snapshot backup][2]. This feature helps to move the snapshot data to a storage with lower cost and larger scale for long time preservation.
- For public cloud users, this feature helps users to fulfil the multiple cloud strategy. It allows users to back up volume snapshots from one cloud provider and preserve or restore the data to another cloud provider. Then users will be free to flow their business data across cloud providers based on Velero backup and restore.
Besides, Velero [File System Backup][3] which could also back up the volume data to a pre-defined backup storage. CSI Snapshot Data Movement works together with [File System Backup][3] to satisfy different requirements for the above scenarios. And whenever available, CSI Snapshot Data Movement should be used in preference since the [File System Backup][3] reads data from the live PV, in which way the data is not captured at the same point in time, so is less consistent.
Moreover, CSI Snapshot Data Movement brings more possible ways of data access, i.e., accessing the data from the block level, either fully or incrementally.
On the other hand, there are quite some cases that CSI snapshot is not available (i.e., you need a volume snapshot plugin for your storage platform, or you're using EFS, NFS, emptyDir, local, or any other volume type that doesn't have a native snapshot), then [File System Backup][3] will be the only option.
CSI Snapshot Data Movement supports both built-in data mover and customized data movers. For the details of how Velero works with customized data movers, check the [Volume Snapshot Data Movement design][1]. Velero provides a built-in data mover which uses Velero built-in uploaders (at present the available uploader is Kopia uploader) to read the snapshot data and write to the Unified Repository (by default implemented by Kopia repository).
The way for Velero built-in data mover to access the snapshot data is based on the hostpath access by Velero node-agent, so the node-agent pods need to run as root user and even under privileged mode in some environments, as same as [File System Backup][3].
## Setup CSI Snapshot Data Movement
## Prerequisites
1. The source cluster is Kubernetes version 1.20 or greater.
2. The source cluster is running a CSI driver capable of support volume snapshots at the [v1 API level][4].
3. CSI Snapshot Data Movement requires the Kubernetes [MountPropagation feature][5].
### Install Velero Node Agent
Velero Node Agent is a Kubernetes daemonset that hosts Velero data movement modules, i.e., data mover controller, uploader & repository.
If you are using Velero built-in data mover, Node Agent must be installed. To install Node Agent, use the `--use-node-agent` flag.
```
velero install --use-node-agent
```
### Configure Node Agent DaemonSet spec
After installation, some PaaS/CaaS platforms based on Kubernetes also require modifications the node-agent DaemonSet spec.
The steps in this section are only needed if you are installing on RancherOS, OpenShift, VMware Tanzu Kubernetes Grid
Integrated Edition (formerly VMware Enterprise PKS), or Microsoft Azure.
**RancherOS**
Update the host path for volumes in the node-agent DaemonSet in the Velero namespace from `/var/lib/kubelet/pods` to
`/opt/rke/var/lib/kubelet/pods`.
```yaml
hostPath:
path: /var/lib/kubelet/pods
```
to
```yaml
hostPath:
path: /opt/rke/var/lib/kubelet/pods
```
**OpenShift**
To mount the correct hostpath to pods volumes, run the node-agent pod in `privileged` mode.
1. Add the `velero` ServiceAccount to the `privileged` SCC:
```
oc adm policy add-scc-to-user privileged -z velero -n velero
```
2. Modify the DaemonSet yaml to request a privileged mode:
```diff
@@ -67,3 +67,5 @@ spec:
value: /credentials/cloud
- name: VELERO_SCRATCH_DIR
value: /scratch
+ securityContext:
+ privileged: true
```
or
```shell
oc patch ds/node-agent \
--namespace velero \
--type json \
-p '[{"op":"add","path":"/spec/template/spec/containers/0/securityContext","value": { "privileged": true}}]'
```
If node-agent is not running in a privileged mode, it will not be able to access snapshot volumes within the mounted
hostpath directory because of the default enforced SELinux mode configured in the host system level. You can
[create a custom SCC][6] to relax the
security in your cluster so that node-agent pods are allowed to use the hostPath volume plugin without granting
them access to the `privileged` SCC.
By default a userland openshift namespace will not schedule pods on all nodes in the cluster.
To schedule on all nodes the namespace needs an annotation:
```
oc annotate namespace <velero namespace> openshift.io/node-selector=""
```
This should be done before velero installation.
Or the ds needs to be deleted and recreated:
```
oc get ds node-agent -o yaml -n <velero namespace> > ds.yaml
oc annotate namespace <velero namespace> openshift.io/node-selector=""
oc create -n <velero namespace> -f ds.yaml
```
**VMware Tanzu Kubernetes Grid Integrated Edition (formerly VMware Enterprise PKS)**
You need to enable the `Allow Privileged` option in your plan configuration so that Velero is able to mount the hostpath.
The hostPath should be changed from `/var/lib/kubelet/pods` to `/var/vcap/data/kubelet/pods`
```yaml
hostPath:
path: /var/vcap/data/kubelet/pods
```
### Configure A Backup Storage Location
At present, Velero backup repository supports object storage as the backup storage. Velero gets the parameters from the
[BackupStorageLocation][8] to compose the URL to the backup storage.
Velero's known object storage providers are included here [supported providers][9], for which, Velero pre-defines the endpoints. If you want to use a different backup storage, make sure it is S3 compatible and you provide the correct bucket name and endpoint in BackupStorageLocation. Velero handles the creation of the backup repo prefix in the backup storage, so make sure it is specified in BackupStorageLocation correctly.
Velero creates one backup repository per namespace. For example, if backing up 2 namespaces, namespace1 and namespace2, using kopia repository on AWS S3, the full backup repo path for namespace1 would be `https://s3-us-west-2.amazonaws.com/bucket/kopia/ns1` and for namespace2 would be `https://s3-us-west-2.amazonaws.com/bucket/kopia/ns2`.
There may be additional installation steps depending on the cloud provider plugin you are using. You should refer to the [plugin specific documentation][9] for the must up to date information.
**Note:** Currently, Velero creates a secret named `velero-repo-credentials` in the velero install namespace, containing a default backup repository password.
You can update the secret with your own password encoded as base64 prior to the first backup (i.e., [File System Backup][3], snapshot data movements) targeting to the backup repository. The value of the key to update is
```
data:
repository-password: <custom-password>
```
Backup repository is created during the first execution of backup targeting to it after installing Velero with node agent. If you update the secret password after the first backup which created the backup repository, then Velero will not be able to connect with the older backups.
## Install Velero with CSI support on source cluster
On source cluster, Velero needs to manipulate CSI snapshots through the CSI volume snapshot APIs, so you must enable the `EnableCSI` feature flag and install the Velero [CSI plugin][2] on the Velero server.
Both of these can be added with the `velero install` command.
```bash
velero install \
--features=EnableCSI \
--plugins=<object storage plugin>,velero/velero-plugin-for-csi:v0.6.0 \
...
```
### Configure storage class on target cluster
For Velero built-in data movement, CSI facilities are not required necessarily in the target cluster. On the other hand, Velero built-in data movement creates a PVC with the same specification as it is in the source cluster and expects the volume to be provisioned similarly. For example, the same storage class should be working in the target cluster.
By default, Velero won't restore storage class resources from the backup since they are cluster scope resources. However, if you specify the `--include-cluster-resources` restore flag, they will be restored. For a cross provider scenario, the storage class from the source cluster is probably not usable in the target cluster.
In either of the above cases, the best practice is to create a working storage class in the target cluster with the same name as it in the source cluster. In this way, even though `--include-cluster-resources` is specified, Velero restore will skip restoring the storage class since it finds an existing one.
Otherwise, if the storage class name in the target cluster is different, you can change the PVC's storage class name during restore by the [changing PV/PVC storage class][10] method. You can also configure to skip restoring the storage class resources from the backup since they are not usable.
### Customized Data Movers
If you are using a customized data mover, follow the data mover's instructions for any further prerequisites.
For Velero side configurations mentioned above, the installation and configuration of node-agent may not be required.
## To back up
Velero uses a new custom resource `DataUpload` to drive the data movement. The selected data mover will watch and reconcile the CRs.
Velero allows users to decide whether the CSI snapshot data should be moved per backup.
Velero also allows users to select the data mover to move the CSI snapshot data per backup.
The both selections are simply done by a parameter when running the backup.
To take a backup with Velero's built-in data mover:
```bash
velero backup create NAME --snapshot-move-data OPTIONS...
```
Or if you want to use a customized data mover:
```bash
velero backup create NAME --snapshot-move-data --data-mover DATA-MOVER-NAME OPTIONS...
```
When the backup starts, you will see the `VolumeSnapshot` and `VolumeSnapshotContent` objects created, but after the backup finishes, the objects will disppear.
After snapshots are created, you will see one or more `DataUpload` CRs created.
You may also see some intermediate objects (i.e., pods, PVCs, PVs) created in Velero namespace or the cluster scope, they are to help data movers to move data. And they will be removed after the backup completes.
The phase of a `DataUpload` CR changes several times during the backup process and finally goes to one of the terminal status, `Completed`, `Failed` or `Cancelled`. You can see the phase changes as well as the data upload progress by watching the `DataUpload` CRs:
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=YOUR_BACKUP_NAME -w
```
When the backup completes, you can view information about the backups:
```bash
velero backup describe YOUR_BACKUP_NAME
```
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=YOUR_BACKUP_NAME -o yaml
```
## To restore
You don't need to set any additional information when creating a data mover restore. The configurations are automatically retrieved from the backup, i.e., whether data movement should be involved and which data mover conducts the data movement.
To restore from your Velero backup:
```bash
velero restore create --from-backup BACKUP_NAME OPTIONS...
```
When the restore starts, you will see one or more `DataDownload` CRs created.
You may also see some intermediate objects (i.e., pods, PVCs, PVs) created in Velero namespace or the cluster scope, they are to help data movers to move data. And they will be removed after the restore completes.
The phase of a `DataDownload` CR changes several times during the restore process and finally goes to one of the terminal status, `Completed`, `Failed` or `Cancelled`. You can see the phase changes as well as the data download progress by watching the DataDownload CRs:
```bash
kubectl -n velero get datadownloads -l velero.io/restore-name=YOUR_RESTORE_NAME -w
```
When the restore completes, view information about your restores:
```bash
velero restore describe YOUR_RESTORE_NAME
```
```bash
kubectl -n velero get datadownloads -l velero.io/restore-name=YOUR_RESTORE_NAME -o yaml
```
## Limitations
- CSI and CSI snapshot support both file system volume mode and block volume mode. At present, Velero built-in data mover doesn't support
block mode volume or volume snapshot.
- [Velero built-in data mover] At present, Velero uses a static, common encryption key for all backup repositories it creates. **This means
that anyone who has access to your backup storage can decrypt your backup data**. Make sure that you limit access
to the backup storage appropriately.
- [Velero built-in data mover] Even though the backup data could be incrementally preserved, for a single file data, Velero built-in data mover leverages on deduplication to find the difference to be saved. This means that large files (such as ones storing a database) will take a long time to scan for data deduplication, even if the actual difference is small.
- [Velero built-in data mover] You may need to [customize the resource limits][11] to make sure backups complete successfully for massive small files or large backup size cases, for more details refer to [Velero file system level backup performance guide][12].
## Troubleshooting
Run the following checks:
Are your Velero server and daemonset pods running?
```bash
kubectl get pods -n velero
```
Does your backup repository exist, and is it ready?
```bash
velero repo get
velero repo get REPO_NAME -o yaml
```
Are there any errors in your Velero backup/restore?
```bash
velero backup describe BACKUP_NAME
velero backup logs BACKUP_NAME
velero restore describe RESTORE_NAME
velero restore logs RESTORE_NAME
```
What is the status of your `DataUpload` and `DataDownload`?
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=BACKUP_NAME -o yaml
kubectl -n velero get datadownloads -l velero.io/restore-name=RESTORE_NAME -o yaml
```
Is there any useful information in the Velero server or daemonset pod logs?
```bash
kubectl -n velero logs deploy/velero
kubectl -n velero logs DAEMON_POD_NAME
```
**NOTE**: You can increase the verbosity of the pod logs by adding `--log-level=debug` as an argument to the container command in the deployment/daemonset pod template spec.
If you are using a customized data mover, follow the data mover's instruction for additional troubleshooting methods.
## How backup and restore work
CSI snapshot data movement is a combination of CSI snapshot and data movement, which is jointly executed by Velero server, CSI plugin and the data mover.
This section lists some general concept of how CSI snapshot data movement backup and restore work. For the detailed mechanisms and workflows, you can check the [Volume Snapshot Data Movement design][1].
### Custom resource and controllers
Velero has three custom resource definitions and associated controllers:
- `DataUpload` - represents a data upload of a volume snapshot. The CSI plugin creates one `DataUpload` per CSI snapshot. Data movers need to handle these CRs to finish the data upload process.
Velero built-in data mover runs a controller for this resource on each node (in node-agent daemonset). Controllers from different nodes may handle one CR in different phases, but finally the data transfer is done by one single controller which will call uploaders from the backend.
- `DataDownload` - represents a data download of a volume snapshot. The CSI plugin creates one `DataDownload` per volume to be restored. Data movers need to handle these CRs to finish the data upload process.
Velero built-in data mover runs a controller for this resource on each node (in node-agent daemonset). Controllers from different nodes may handle one CR in different phases, but finally the data transfer is done by one single controller which will call uploaders from the backend.
- `BackupRepository` - represents/manages the lifecycle of Velero's backup repositories. Velero creates a backup repository per namespace when the first CSI snapshot backup/restore for a namespace is requested. You can see information about your Velero's backup repositories by running `velero repo get`.
This CR is used by Velero built-in data movers, customized data movers may or may not use it.
For other resources or controllers involved by customized data movers, check the data mover's instructions.
### Backup
Velero backs up resources for CSI snapshot data movement backup in the same way as other backup types. When it encounters a PVC, particular logics will be conducted:
- When it finds a PVC object, Velero calls CSI plugin through a Backup Item Action.
- CSI plugin first takes a CSI snapshot to the PVC by creating the `VolumeSnapshot` and `VolumeSnapshotContent`.
- CSI plugin checks if a data movement is required, if so it creates a `DataUpload` CR and then returns to Velero backup.
- Velero now is able to back up other resources, including other PVC objects.
- Velero backup controller periodically queries the data movement status from CSI plugin, the period is configurable through the Velero server parameter `--item-operation-sync-frequency`, by default it is 10s. On the call, CSI plugin turns to check the phase of the `DataUpload` CRs.
- When all the `DataUpload` CRs come to a terminal state (i.e., `Completed`, `Failed` or `Cancelled`), Velero backup perists all the necessary information and finish the backup.
- CSI plugin expects a data mover to handle the `DataUpload` CR. If no data mover is configured for the backup, Velero built-in data mover will handle it.
- If the `DataUpload` CR does not reach to the terminal state with in the given time, the `DataUpload` CR will be cancelled. You can set the timeout value per backup through the `--item-operation-timeout` parameter, the default value is `4 hours`.
- Velero built-in data mover creates a volume from the CSI snapshot and transfer the data to the backup storage according to the backup storage location defined by users.
- After the volume is created from the CSI snapshot, Velero built-in data mover waits for Kubernetes to provision the volume, this may take some time varying from storage providers, but if the provision cannot be finished in a given time, Velero built-in data mover will cancel this `DataUpload` CR. The timeout is configurable through a node-agent's parameter `data-mover-prepare-timeout`, the default value is 30 minutes.
- When the data transfer completes or any error happens, Velero built-in data mover sets the `DataUpload` CR to the terminal state, either `Completed` or `Failed`.
- Velero built-in data mover also monitors the cancellation request to the `DataUpload` CR, once that happens, it cancels its ongoing activities, cleans up the intermediate resources and set the `DataUpload` CR to `Cancelled`.
### Restore
Velero restores resources for CSI snapshot data movement restore in the same way as other restore types. When it encounters a PVC, particular logics will be conducted:
- When it finds a PVC object, Velero calls CSI plugin through a Restore Item Action.
- CSI plugin checks the backup information, if a data movement was involved, it creates a `DataDownload` CR and then returns to Velero restore.
- Velero is now able to restore other resources, including other PVC objects.
- Velero restore controller periodically queries the data movement status from CSI plugin, the period is configurable through the Velero server parameter `--item-operation-sync-frequency`, by default it is 10s. On the call, CSI plugin turns to check the phase of the `DataDownload` CRs.
- When all `DataDownload` CRs come to a terminal state (i.e., `Completed`, `Failed` or `Cancelled`), Velero restore will finish.
- CSI plugin expects the same data mover for the backup to handle the `DataDownload` CR. If no data mover was configured for the backup, Velero built-in data mover will handle it.
- If the `DataDownload` CR does not reach to the terminal state with in the given time, the `DataDownload` CR will be cancelled. You can set the timeout value per backup through the same `--item-operation-timeout` parameter.
- Velero built-in data mover creates a volume with the same specification of the source volume.
- Velero built-in data mover waits for Kubernetes to provision the volume, this may take some time varying from storage providers, but if the provision cannot be finished in a given time, Velero built-in data mover will cancel this `DataDownload` CR. The timeout is configurable through the same node-agent's parameter `data-mover-prepare-timeout`.
- After the volume is provisioned, Velero built-in data mover starts to transfer the data from the backup storage according to the backup storage location defined by users.
- When the data transfer completes or any error happens, Velero built-in data mover sets the `DataDownload` CR to the terminal state, either `Completed` or `Failed`.
- Velero built-in data mover also monitors the cancellation request to the `DataDownload` CR, once that happens, it cancels its ongoing activities, cleans up the intermediate resources and set the `DataDownload` CR to `Cancelled`.
### Parallelism
Velero calls the CSI plugin concurrently for the volume, so `DataUpload`/`DataDownload` CRs are created concurrently by the CSI plugin. For more details about the call between Velero and CSI plugin, check the [Volume Snapshot Data Movement design][1].
In which manner the `DataUpload`/`DataDownload` CRs are processed is totally decided by the data mover you select for the backup/restore.
For Velero built-in data mover, it uses Kubernetes' scheduler to mount a snapshot volume/restore volume associated to a `DataUpload`/`DataDownload` CR into a specific node, and then the `DataUpload`/`DataDownload` controller (in node-agent daemonset) in that node will handle the `DataUpload`/`DataDownload`.
At present, a `DataUpload`/`DataDownload` controller in one node handles one request at a time.
That is to say, the snapshot volumes/restore volumes may spread in different nodes, then their associated `DataUpload`/`DataDownload` CRs will be processed in parallel; while for the snapshot volumes/restore volumes in the same node, their associated `DataUpload`/`DataDownload` CRs are processed sequentially.
You can check in which node the `DataUpload`/`DataDownload` CRs are processed and their parallelism by watching the `DataUpload`/`DataDownload` CRs:
```bash
kubectl -n velero get datauploads -l velero.io/backup-name=YOUR_BACKUP_NAME -w
```
```bash
kubectl -n velero get datadownloads -l velero.io/restore-name=YOUR_RESTORE_NAME -w
```
### Cancellation
At present, Velero backup and restore doesn't support end to end cancellation that is launched by users.
However, Velero cancels the `DataUpload`/`DataDownload` in below scenarios automatically:
- When Velero server is restarted
- When node-agent is restarted
- When an ongoing backup/restore is deleted
- When a backup/restore does not finish before the item operation timeout (default value is `4 hours`)
Customized data movers that support cancellation could cancel their ongoing tasks and clean up any intermediate resources. If you are using Velero built-in data mover, the cancellation is supported.
[1]: https://github.com/vmware-tanzu/velero/pull/5968
[2]: csi.md
[3]: file-system-backup.md
[4]: https://kubernetes.io/blog/2020/12/10/kubernetes-1.20-volume-snapshot-moves-to-ga/
[5]: https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation
[6]: https://docs.openshift.com/container-platform/3.11/admin_guide/manage_scc.html
[7]: https://docs.microsoft.com/en-us/azure/aks/azure-files-dynamic-pv
[8]: api-types/backupstoragelocation.md
[9]: supported-providers.md
[10]: restore-reference.md#changing-pv/pvc-Storage-Classes
[11]: customize-installation.md#customize-resource-requests-and-limits
[12]: performance-guidance.md

View File

@@ -37,11 +37,53 @@ This section documents some of the choices made during implementation of the Vel
1. VolumeSnapshots created by the Velero CSI plugins are retained only for the lifetime of the backup even if the `DeletionPolicy` on the VolumeSnapshotClass is set to `Retain`. To accomplish this, during deletion of the backup the prior to deleting the VolumeSnapshot, VolumeSnapshotContent object is patched to set its `DeletionPolicy` to `Delete`. Deleting the VolumeSnapshot object will result in cascade delete of the VolumeSnapshotContent and the snapshot in the storage provider.
1. VolumeSnapshotContent objects created during a `velero backup` that are dangling, unbound to a VolumeSnapshot object, will be discovered, using labels, and deleted on backup deletion.
1. The Velero CSI plugins, to backup CSI backed PVCs, will choose the VolumeSnapshotClass in the cluster that has the same driver name and also has the `velero.io/csi-volumesnapshot-class` label set on it, like
```yaml
velero.io/csi-volumesnapshot-class: "true"
```
1. The VolumeSnapshot objects will be removed from the cluster after the backup is uploaded to the object storage, so that the namespace that is backed up can be deleted without removing the snapshot in the storage provider if the `DeletionPolicy` is `Delete.
1. The Velero CSI plugins, to backup CSI backed PVCs, will choose the VolumeSnapshotClass in the cluster based on the following logic:
1. **Default Behavior:**
You can simply create a VolumeSnapshotClass for a particular driver and put a label on it to indicate that it is the default VolumeSnapshotClass for that driver. For example, if you want to create a VolumeSnapshotClass for the CSI driver `disk.csi.cloud.com` for taking snapshots of disks created with `disk.csi.cloud.com` based storage classes, you can create a VolumeSnapshotClass like this:
```yaml
apiVersion: snapshot.storage.k8s.io/v1
kind: VolumeSnapshotClass
metadata:
name: test-snapclass
labels:
velero.io/csi-volumesnapshot-class: "true"
driver: disk.csi.cloud.com
```
Note: For each driver type, there should only be 1 VolumeSnapshotClass with the label `velero.io/csi-volumesnapshot-class: "true"`.
2. **Choose VolumeSnapshotClass for a particular Backup Or Schedule:**
If you want to use a particular VolumeSnapshotClass for a particular backup or schedule, you can add a annotation to the backup or schedule to indicate which VolumeSnapshotClass to use. For example, if you want to use the VolumeSnapshotClass `test-snapclass` for a particular backup for snapshotting PVCs of `disk.csi.cloud.com`, you can create a backup like this:
```yaml
apiVersion: velero.io/v1
kind: Backup
metadata:
name: test-backup
annotations:
velero.io/csi-volumesnapshot-class_disk.csi.cloud.com: "test-snapclass"
spec:
includedNamespaces:
- default
```
Note: Please ensure all your annotations are in lowercase. And follow the following format: `velero.io/csi-volumesnapshot-class_<driver name> = <VolumeSnapshotClass Name>`
3. **Choosing VolumeSnapshotClass for a particular PVC:**
If you want to use a particular VolumeSnapshotClass for a particular PVC, you can add a annotation to the PVC to indicate which VolumeSnapshotClass to use. This overrides any annotation added to backup or schedule. For example, if you want to use the VolumeSnapshotClass `test-snapclass` for a particular PVC, you can create a PVC like this:
```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-pvc
annotations:
velero.io/csi-volumesnapshot-class: "test-snapclass"
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: disk.csi.cloud.com
```
1. The VolumeSnapshot objects will be removed from the cluster after the backup is uploaded to the object storage, so that the namespace that is backed up can be deleted without removing the snapshot in the storage provider if the `DeletionPolicy` is `Delete`.
## How it Works - Overview

View File

@@ -13,6 +13,8 @@ the supported cloud providers block storage offerings (Amazon EBS Volumes, Az
It also provides a plugin model that enables anyone to implement additional object and block storage backends, outside the
main Velero repository.
If your storage supports CSI (Container Storage Interface) snapshots, Velero also allows you to take snapshots through CSI and then optionally move the snapshot data to a different storage location.
Velero's File System Backup is an addition to the aforementioned snapshot approaches. Its pros and cons are listed below:
Pros:
- It is capable of backing up and restoring almost any type of Kubernetes volume. Therefore, if you need a volume snapshot
@@ -22,9 +24,8 @@ have a native snapshot concept, FSB might be for you.
the one backing Kubernetes volumes, for example, a durable storage.
Cons:
- It backs up data from the live file system, so the backup data is less consistent than the snapshot approaches.
- It access the file system from the mounted hostpath directory, so the pods need to run as root user and even under
privileged mode in some environments.
- It backs up data from the live file system, in which way the data is not captured at the same point in time, so is less consistent than the snapshot approaches.
- It access the file system from the mounted hostpath directory, so Velero Node Agent pods need to run as root user and even under privileged mode in some environments.
**NOTE:** hostPath volumes are not supported, but the [local volume type][5] is supported.
@@ -64,6 +65,15 @@ for namespace2 would be `https://s3-us-west-2.amazonaws.com/bucket/kopia/ns2`.
There may be additional installation steps depending on the cloud provider plugin you are using. You should refer to the
[plugin specific documentation](supported-providers.md) for the must up to date information.
**Note:** Currently, Velero creates a secret named `velero-repo-credentials` in the velero install namespace, containing a default backup repository password.
You can update the secret with your own password encoded as base64 prior to the first backup (i.e., FS Backup, data mover) targeting to the backup repository. The value of the key to update is
```
data:
repository-password: <custom-password>
```
Backup repository is created during the first execution of backup targeting to it after installing Velero with node agent. If you update the secret password after the first
backup which created the backup repository, then Velero will not be able to connect with the older backups.
### Configure Node Agent DaemonSet spec
After installation, some PaaS/CaaS platforms based on Kubernetes also require modifications the node-agent DaemonSet spec.
@@ -98,7 +108,7 @@ To mount the correct hostpath to pods volumes, run the node-agent pod in `privil
1. Add the `velero` ServiceAccount to the `privileged` SCC:
```
$ oc adm policy add-scc-to-user privileged -z velero -n velero
oc adm policy add-scc-to-user privileged -z velero -n velero
```
2. Modify the DaemonSet yaml to request a privileged mode:
@@ -157,20 +167,6 @@ hostPath:
path: /var/vcap/data/kubelet/pods
```
**Microsoft Azure**
If you are using [Azure Files][8], you need to add `nouser_xattr` to your storage class's `mountOptions`.
See [this restic issue][9] for more details.
You can use the following command to patch the storage class:
```bash
kubectl patch storageclass/<YOUR_AZURE_FILE_STORAGE_CLASS_NAME> \
--type json \
--patch '[{"op":"add","path":"/mountOptions/-","value":"nouser_xattr"}]'
```
## To back up
Velero supports two approaches of discovering pod volumes that need to be backed up using FSB:
@@ -354,6 +350,7 @@ to make sure backups complete successfully for massive small files or large back
For this reason, FSB can only backup volumes that are mounted by a pod and not directly from the PVC. For orphan PVC/PV pairs
(without running pods), some Velero users overcame this limitation running a staging pod (i.e. a busybox or alpine container
with an infinite sleep) to mount these PVC/PV pairs prior taking a Velero backup.
- Velero File System Backup expects volumes to be mounted under `<hostPath>/<pod UID>` (`hostPath` is configurable as mentioned in [Configure Node Agent DaemonSet spec](#configure-node-agent-daemonset-spec)). Some Kubernetes systems (i.e., [vCluster][11]) don't mount volumes under the `<pod UID>` sub-dir, Velero File System Backup is not working with them.
## Customize Restore Helper Container
@@ -595,3 +592,4 @@ To solve this, a controller was written by Thomann Bits&Beats: [velero-pvc-watch
[8]: https://docs.microsoft.com/en-us/azure/aks/azure-files-dynamic-pv
[9]: https://github.com/restic/restic/issues/1800
[10]: customize-installation.md#default-pod-volume-backup-to-file-system-backup
[11]: https://www.vcluster.com/

View File

@@ -29,11 +29,14 @@ Below is the two-step of using resource modifiers to modify the resources during
version: v1
resourceModifierRules:
- conditions:
groupKind: persistentvolumeclaims
resourceNameRegex: "^mysql.*$"
namespaces:
- bar
- foo
groupResource: persistentvolumeclaims
resourceNameRegex: "^mysql.*$"
namespaces:
- bar
- foo
labelSelector:
matchLabels:
foo: bar
patches:
- operation: replace
path: "/spec/storageClassName"
@@ -42,9 +45,10 @@ resourceModifierRules:
path: "/metadata/labels/test"
```
- The above configmap will apply the JSON Patch to all the PVCs in the namespaces bar and foo with name starting with mysql. The JSON Patch will replace the storageClassName with "premium" and remove the label "test" from the PVCs.
- The above configmap will apply the JSON Patch to all the PVCs in the namespaces bar and foo with name starting with mysql and match label `foo: bar`. The JSON Patch will replace the storageClassName with "premium" and remove the label "test" from the PVCs.
- Note that the Namespace here is the original namespace of the backed up resource, not the new namespace where the resource is going to be restored.
- You can specify multiple JSON Patches for a particular resource. The patches will be applied in the order specified in the configmap. A subsequent patch is applied in order and if multiple patches are specified for the same path, the last patch will override the previous patches.
- You can can specify multiple resourceModifierRules in the configmap. The rules will be applied in the order specified in the configmap.
- You can specify multiple resourceModifierRules in the configmap. The rules will be applied in the order specified in the configmap.
### Operations supported by the JSON Patch RFC:
- add
@@ -61,7 +65,7 @@ resourceModifierRules:
version: v1
resourceModifierRules:
- conditions:
groupKind: persistentvolumeclaims
groupResource: persistentvolumeclaims
resourceNameRegex: ".*"
namespaces:
- bar
@@ -80,7 +84,7 @@ resourceModifierRules:
version: v1
resourceModifierRules:
- conditions:
groupKind: deployments.apps
groupResource: deployments.apps
resourceNameRegex: "^test-.*$"
namespaces:
- bar

View File

@@ -49,6 +49,8 @@ toc:
url: /namespace
- page: CSI Support
url: /csi
- page: CSI Snapshot Data Movement
url: /csi-snapshot-data-movement
- page: Verifying Self-signed Certificates
url: /self-signed-certificates
- page: Changing RBAC permissions

View File

@@ -49,6 +49,8 @@ toc:
url: /namespace
- page: CSI Support
url: /csi
- page: CSI Snapshot Data Movement
url: /csi-snapshot-data-movement
- page: Verifying Self-signed Certificates
url: /self-signed-certificates
- page: Changing RBAC permissions

View File

@@ -51,7 +51,6 @@ SKIP_STR := $(foreach var, $(subst ., ,$(GINKGO_SKIP)),-skip "$(var)")
FOCUS_STR := $(foreach var, $(subst ., ,$(GINKGO_FOCUS)),-focus "$(var)")
VELERO_CLI ?=$$(pwd)/../../_output/bin/$(GOOS)/$(GOARCH)/velero
VELERO_IMAGE ?= velero/velero:main
VELERO_VERSION ?= $(VERSION)
PLUGINS ?=
RESTORE_HELPER_IMAGE ?=
#Released version only
@@ -74,6 +73,8 @@ BSL_CONFIG ?=
VSL_CONFIG ?=
CLOUD_PROVIDER ?=
STANDBY_CLUSTER_CLOUD_PROVIDER ?=
STANDBY_CLUSTER_PLUGINS ?=
STANDBY_CLUSTER_OBJECT_STORE_PROVIDER ?=
OBJECT_STORE_PROVIDER ?=
INSTALL_VELERO ?= true
REGISTRY_CREDENTIAL_FILE ?=
@@ -90,6 +91,7 @@ ADDITIONAL_BSL_CONFIG ?=
FEATURES ?=
DEBUG_E2E_TEST ?= false
DEBUG_VELERO_POD_RESTART ?= false
VELERO_SERVER_DEBUG_MODE ?= false
# Parameters to run migration tests along with all other E2E tests, and both of them should
@@ -119,7 +121,7 @@ run: ginkgo
@$(GINKGO) -v $(FOCUS_STR) $(SKIP_STR) . -- -velerocli=$(VELERO_CLI) \
-velero-image=$(VELERO_IMAGE) \
-plugins=$(PLUGINS) \
-velero-version=$(VELERO_VERSION) \
-velero-version=$(VERSION) \
-restore-helper-image=$(RESTORE_HELPER_IMAGE) \
-upgrade-from-velero-cli=$(UPGRADE_FROM_VELERO_CLI) \
-upgrade-from-velero-version=$(UPGRADE_FROM_VELERO_VERSION) \
@@ -150,7 +152,10 @@ run: ginkgo
-uploader-type=$(UPLOADER_TYPE) \
-snapshot-move-data=$(SNAPSHOT_MOVE_DATA) \
-data-mover-plugin=$(DATA_MOVER_plugin) \
-standby-cluster-cloud-provider=$(STANDBY_CLUSTER_CLOUD_PROVIDER)
-standby-cluster-cloud-provider=$(STANDBY_CLUSTER_CLOUD_PROVIDER) \
-standby-cluster-plugins=$(STANDBY_CLUSTER_PLUGINS) \
-standby-cluster-object-store-provider=$(STANDBY_CLUSTER_OBJECT_STORE_PROVIDER) \
-debug-velero-pod-restart=$(DEBUG_VELERO_POD_RESTART)
build: ginkgo
mkdir -p $(OUTPUT_DIR)

View File

@@ -66,7 +66,7 @@ func BackupRestoreTest(useVolumeSnapshots bool) {
AfterEach(func() {
if !veleroCfg.Debug {
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
DeleteAllBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
})
if veleroCfg.InstallVelero {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)

View File

@@ -69,7 +69,7 @@ func backup_deletion_test(useVolumeSnapshots bool) {
AfterEach(func() {
if !veleroCfg.Debug {
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
DeleteAllBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
})
}
})

View File

@@ -66,7 +66,7 @@ func BackupsSyncTest() {
AfterEach(func() {
if !VeleroCfg.Debug {
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *VeleroCfg.ClientToInstallVelero)
DeleteAllBackups(context.Background(), *VeleroCfg.ClientToInstallVelero)
})
if VeleroCfg.InstallVelero {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)

View File

@@ -78,7 +78,7 @@ func TTLTest() {
veleroCfg.GCFrequency = ""
if !veleroCfg.Debug {
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
DeleteAllBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
})
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()

View File

@@ -69,7 +69,7 @@ func APIExtensionsVersionsTest() {
AfterEach(func() {
if !veleroCfg.Debug {
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.DefaultClient)
DeleteAllBackups(context.Background(), *veleroCfg.DefaultClient)
})
if veleroCfg.InstallVelero {
By("Uninstall Velero and delete CRD ", func() {

View File

@@ -94,7 +94,7 @@ func APIGropuVersionsTest() {
}
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
DeleteAllBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
})
if veleroCfg.InstallVelero {
By("Uninstall Velero in api group version case", func() {

View File

@@ -73,6 +73,14 @@ func (p *PVCSelectedNodeChanging) CreateResources() error {
fmt.Sprintf("Failed to create namespace %s", p.namespace))
})
By(fmt.Sprintf("Create a storage class %s.", StorageClassName), func() {
Expect(InstallStorageClass(context.Background(), fmt.Sprintf("../testdata/storage-class/%s.yaml", p.VeleroCfg.CloudProvider))).To(Succeed())
})
By(fmt.Sprintf("Create a storage class %s.", StorageClassName), func() {
Expect(InstallTestStorageClasses(fmt.Sprintf("../testdata/storage-class/%s.yaml", VeleroCfg.CloudProvider))).To(Succeed(), "Failed to install storage class")
})
By(fmt.Sprintf("Create pod %s in namespace %s", p.podName, p.namespace), func() {
nodeNameList, err := GetWorkerNodes(p.Ctx)
Expect(err).To(Succeed())
@@ -80,7 +88,7 @@ func (p *PVCSelectedNodeChanging) CreateResources() error {
p.oldNodeName = nodeName
fmt.Printf("Create PVC on node %s\n", p.oldNodeName)
pvcAnn := map[string]string{p.ann: nodeName}
_, err := CreatePod(p.Client, p.namespace, p.podName, "default", p.pvcName, []string{p.volume}, pvcAnn, nil)
_, err := CreatePod(p.Client, p.namespace, p.podName, StorageClassName, p.pvcName, []string{p.volume}, pvcAnn, nil)
Expect(err).To(Succeed())
err = WaitForPods(p.Ctx, p.Client, p.namespace, []string{p.podName})
Expect(err).To(Succeed())

View File

@@ -50,8 +50,8 @@ func (s *StorageClasssChanging) Init() error {
Text: "Change the storage class of persistent volumes and persistent" +
" volume claims during restores",
}
s.srcStorageClass = "default"
s.desStorageClass = StorageClassName
s.srcStorageClass = StorageClassName
s.desStorageClass = StorageClassName2
s.labels = map[string]string{"velero.io/change-storage-class": "RestoreItemAction",
"velero.io/plugin-config": ""}
s.data = map[string]string{s.srcStorageClass: s.desStorageClass}
@@ -75,10 +75,11 @@ func (s *StorageClasssChanging) CreateResources() error {
"app": "test",
}
s.Ctx, s.CtxCancel = context.WithTimeout(context.Background(), 10*time.Minute)
By(fmt.Sprintf("Create a storage class %s", s.desStorageClass), func() {
Expect(InstallStorageClass(s.Ctx, fmt.Sprintf("../testdata/storage-class/%s.yaml",
s.VeleroCfg.CloudProvider))).To(Succeed())
By(("Installing storage class..."), func() {
Expect(InstallTestStorageClasses(fmt.Sprintf("../testdata/storage-class/%s.yaml", s.VeleroCfg.CloudProvider))).To(Succeed(), "Failed to install storage class")
})
By(fmt.Sprintf("Create namespace %s", s.namespace), func() {
Expect(CreateNamespace(s.Ctx, s.Client, s.namespace)).To(Succeed(),
fmt.Sprintf("Failed to create namespace %s", s.namespace))

View File

@@ -77,7 +77,7 @@ func BslDeletionTest(useVolumeSnapshots bool) {
AfterEach(func() {
if !veleroCfg.Debug {
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.DefaultClient)
DeleteAllBackups(context.Background(), *veleroCfg.DefaultClient)
})
By(fmt.Sprintf("Delete sample workload namespace %s", bslDeletionTestNs), func() {
Expect(DeleteNamespace(context.Background(), *veleroCfg.ClientToInstallVelero, bslDeletionTestNs,

View File

@@ -85,10 +85,13 @@ func init() {
flag.StringVar(&VeleroCfg.DefaultCluster, "default-cluster", "", "Default cluster context for migration test.")
flag.StringVar(&VeleroCfg.StandbyCluster, "standby-cluster", "", "Standby cluster context for migration test.")
flag.StringVar(&VeleroCfg.UploaderType, "uploader-type", "", "Identify persistent volume backup uploader.")
flag.BoolVar(&VeleroCfg.VeleroServerDebugMode, "velero-server-debug-mode", false, "Identify persistent volume backup uploader.")
flag.BoolVar(&VeleroCfg.VeleroServerDebugMode, "velero-server-debug-mode", false, "Switch for enabling Velero server log debug level.")
flag.BoolVar(&VeleroCfg.SnapshotMoveData, "snapshot-move-data", false, "Install default plugin for data mover.")
flag.StringVar(&VeleroCfg.DataMoverPlugin, "data-mover-plugin", "", "Install customized plugin for data mover.")
flag.StringVar(&VeleroCfg.StandbyClusterCloudProvider, "standby-cluster-cloud-provider", "", "Install customized plugin for data mover.")
flag.StringVar(&VeleroCfg.StandbyClusterCloudProvider, "standby-cluster-cloud-provider", "", "Cloud provider for standby cluster.")
flag.StringVar(&VeleroCfg.StandbyClusterPlugins, "standby-cluster-plugins", "", "Plugins provider for standby cluster.")
flag.StringVar(&VeleroCfg.StandbyClusterOjbectStoreProvider, "standby-cluster-object-store-provider", "", "Object store provider for standby cluster.")
flag.BoolVar(&VeleroCfg.DebugVeleroPodRestart, "debug-velero-pod-restart", false, "Switch for debugging velero pod restart.")
}
var _ = Describe("[APIGroup][APIVersion] Velero tests with various CRD API group versions", APIGropuVersionsTest)

View File

@@ -82,22 +82,26 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
})
AfterEach(func() {
if !veleroCfg.Debug {
// TODO: delete backup created by case self, not all
// By("Clean backups after test", func() {
// DeleteBackups(context.Background(), *veleroCfg.DefaultClient)
// })
if veleroCfg.InstallVelero {
By(fmt.Sprintf("Uninstall Velero and delete sample workload namespace %s", migrationNamespace), func() {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.DefaultCluster)).To(Succeed())
Expect(VeleroUninstall(ctx, veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace)).To(Succeed())
DeleteNamespace(context.Background(), *veleroCfg.DefaultClient, migrationNamespace, true)
By(fmt.Sprintf("Uninstall Velero on cluster %s", veleroCfg.DefaultCluster), func() {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.DefaultCluster)).To(Succeed())
Expect(VeleroUninstall(ctx, veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace)).To(Succeed())
DeleteNamespace(context.Background(), *veleroCfg.DefaultClient, migrationNamespace, true)
})
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.StandbyCluster)).To(Succeed())
Expect(VeleroUninstall(ctx, veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace)).To(Succeed())
By(fmt.Sprintf("Uninstall Velero on cluster %s", veleroCfg.StandbyCluster), func() {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.StandbyCluster)).To(Succeed())
Expect(VeleroUninstall(ctx, veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace)).To(Succeed())
DeleteNamespace(context.Background(), *veleroCfg.StandbyClient, migrationNamespace, true)
})
if veleroCfg.InstallVelero {
By(fmt.Sprintf("Delete sample workload namespace %s", migrationNamespace), func() {
DeleteNamespace(context.Background(), *veleroCfg.StandbyClient, migrationNamespace, true)
})
}
@@ -110,7 +114,7 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
})
When("kibishii is the sample workload", func() {
It("should be successfully backed up and restored to the default BackupStorageLocation", func() {
var backupNames []string
if veleroCfg.SnapshotMoveData {
if !useVolumeSnapshots {
Skip("FSB migration test is not needed in data mover scenario")
@@ -185,7 +189,7 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
KibishiiData := *DefaultKibishiiData
By("Deploy sample workload of Kibishii", func() {
if OriginVeleroCfg.SnapshotMoveData {
KibishiiData.ExpectedNodes = 6
KibishiiData.ExpectedNodes = 3
}
Expect(KibishiiPrepareBeforeBackup(oneHourTimeout, *veleroCfg.DefaultClient, veleroCfg.CloudProvider,
@@ -206,6 +210,7 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
RunDebug(context.Background(), veleroCfg.VeleroCLI, veleroCfg.VeleroNamespace, BackupStorageClassCfg.BackupName, "")
return "Fail to backup workload"
})
backupNames = append(backupNames, BackupStorageClassCfg.BackupName)
var BackupCfg BackupConfig
BackupCfg.BackupName = backupName
@@ -223,6 +228,7 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
RunDebug(context.Background(), OriginVeleroCfg.VeleroCLI, OriginVeleroCfg.VeleroNamespace, BackupCfg.BackupName, "")
return "Fail to backup workload"
})
backupNames = append(backupNames, BackupCfg.BackupName)
})
if useVolumeSnapshots {
@@ -283,7 +289,12 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
veleroCfg.UseRestic = false
if veleroCfg.SnapshotMoveData {
veleroCfg.UseNodeAgent = true
// For SnapshotMoveData pipelines, we should use standby clustr setting for Velero installation
// In nightly CI, StandbyClusterPlugins is set properly if pipeline is for SnapshotMoveData.
veleroCfg.Plugins = veleroCfg.StandbyClusterPlugins
veleroCfg.ObjectStoreProvider = veleroCfg.StandbyClusterOjbectStoreProvider
}
Expect(VeleroInstall(context.Background(), &veleroCfg, true)).To(Succeed())
})
@@ -326,6 +337,11 @@ func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version)
Expect(KibishiiVerifyAfterRestore(*veleroCfg.StandbyClient, migrationNamespace,
oneHourTimeout, &KibishiiData)).To(Succeed(), "Fail to verify workload after restore")
})
// TODO: delete backup created by case self, not all
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.DefaultClient, backupNames)
})
})
})
}

View File

@@ -36,7 +36,7 @@ var yamlData = `
version: v1
resourceModifierRules:
- conditions:
groupKind: deployments.apps
groupResource: deployments.apps
resourceNameRegex: "resource-modifiers-.*"
patches:
- operation: add

View File

@@ -19,7 +19,6 @@ package filtering
import (
"context"
"fmt"
"os"
"strings"
"time"
@@ -33,6 +32,7 @@ import (
. "github.com/vmware-tanzu/velero/test"
. "github.com/vmware-tanzu/velero/test/e2e/test"
. "github.com/vmware-tanzu/velero/test/util/k8s"
. "github.com/vmware-tanzu/velero/test/util/velero"
)
const FileName = "test-data.txt"
@@ -110,7 +110,7 @@ func (r *ResourcePoliciesCase) CreateResources() error {
r.Ctx, r.CtxCancel = context.WithTimeout(context.Background(), 10*time.Minute)
By(("Installing storage class..."), func() {
Expect(r.installTestStorageClasses(fmt.Sprintf("../testdata/storage-class/%s.yaml", VeleroCfg.CloudProvider))).To(Succeed(), "Failed to install storage class")
Expect(InstallTestStorageClasses(fmt.Sprintf("../testdata/storage-class/%s.yaml", VeleroCfg.CloudProvider))).To(Succeed(), "Failed to install storage class")
})
By(fmt.Sprintf("Create configmap %s in namespaces %s for workload\n", r.cmName, r.VeleroCfg.VeleroNamespace), func() {
@@ -188,7 +188,7 @@ func (r *ResourcePoliciesCase) Verify() error {
func (r *ResourcePoliciesCase) Clean() error {
// If created some resources which is not in current test namespace, we NEED to override the base Clean function
if !r.VeleroCfg.Debug {
if err := r.deleteTestStorageClassList([]string{"e2e-storage-class", "e2e-storage-class-2"}); err != nil {
if err := r.deleteTestStorageClassList([]string{StorageClassName, StorageClassName2}); err != nil {
return err
}
@@ -207,13 +207,13 @@ func (r *ResourcePoliciesCase) createPVC(index int, namespace string, volList []
pvcName := fmt.Sprintf("pvc-%d", i)
By(fmt.Sprintf("Creating PVC %s in namespaces ...%s\n", pvcName, namespace))
if index%3 == 0 {
pvcBuilder := NewPVC(namespace, pvcName).WithStorageClass("e2e-storage-class") // Testing sc should not backup
pvcBuilder := NewPVC(namespace, pvcName).WithStorageClass(StorageClassName) // Testing sc should not backup
err = CreatePvc(r.Client, pvcBuilder)
} else if index%3 == 1 {
pvcBuilder := NewPVC(namespace, pvcName).WithStorageClass("e2e-storage-class-2") // Testing sc should backup
pvcBuilder := NewPVC(namespace, pvcName).WithStorageClass(StorageClassName2) // Testing sc should backup
err = CreatePvc(r.Client, pvcBuilder)
} else if index%3 == 2 {
pvcBuilder := NewPVC(namespace, pvcName).WithStorageClass("e2e-storage-class-2").WithResourceStorage(resource.MustParse("2Gi")) // Testing capacity should not backup
pvcBuilder := NewPVC(namespace, pvcName).WithStorageClass(StorageClassName2).WithResourceStorage(resource.MustParse("2Gi")) // Testing capacity should not backup
err = CreatePvc(r.Client, pvcBuilder)
}
if err != nil {
@@ -263,28 +263,3 @@ func (r *ResourcePoliciesCase) deleteTestStorageClassList(scList []string) error
}
return nil
}
func (r *ResourcePoliciesCase) installTestStorageClasses(path string) error {
err := InstallStorageClass(r.Ctx, path)
if err != nil {
return err
}
content, err := os.ReadFile(path)
if err != nil {
return errors.Wrapf(err, "failed to get %s when install storage class", path)
}
// replace sc to new value
newContent := strings.ReplaceAll(string(content), "name: e2e-storage-class", "name: e2e-storage-class-2")
tmpFile, err := os.CreateTemp("", "sc-file")
if err != nil {
return errors.Wrapf(err, "failed to create temp file when install storage class")
}
defer os.Remove(tmpFile.Name())
if _, err := tmpFile.WriteString(newContent); err != nil {
return errors.Wrapf(err, "failed to write content into temp file %s when install storage class", tmpFile.Name())
}
return InstallStorageClass(r.Ctx, tmpFile.Name())
}

View File

@@ -166,7 +166,7 @@ func (o *OrderedResources) Clean() error {
return nil
}
func (o *OrderedResources) DeleteBackups() error {
func (o *OrderedResources) DeleteAllBackups() error {
backupList := new(velerov1api.BackupList)
if err := o.Client.Kubebuilder.List(o.Ctx, backupList, &kbclient.ListOptions{Namespace: o.VeleroCfg.VeleroNamespace}); err != nil {
return fmt.Errorf("failed to list backup object in %s namespace with err %v", o.VeleroCfg.VeleroNamespace, err)

View File

@@ -192,7 +192,7 @@ func (t *TestCase) Clean() error {
CleanupNamespaces(t.Ctx, t.Client, t.CaseBaseName)
})
By("Clean backups after test", func() {
DeleteBackups(t.Ctx, t.Client)
DeleteAllBackups(t.Ctx, t.Client)
})
}
return nil

View File

@@ -87,7 +87,7 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC
AfterEach(func() {
if !veleroCfg.Debug {
By("Clean backups after test", func() {
DeleteBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
DeleteAllBackups(context.Background(), *veleroCfg.ClientToInstallVelero)
})
By(fmt.Sprintf("Delete sample workload namespace %s", upgradeNamespace), func() {
DeleteNamespace(context.Background(), *veleroCfg.ClientToInstallVelero, upgradeNamespace, true)
@@ -136,6 +136,7 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC
version, err := GetVeleroVersion(oneHourTimeout, tmpCfgForOldVeleroInstall.VeleroCLI, true)
Expect(err).To(Succeed(), "Fail to get Velero version")
tmpCfgForOldVeleroInstall.VeleroVersion = version
tmpCfgForOldVeleroInstall.UseVolumeSnapshots = useVolumeSnapshots
if supportUploaderType {
tmpCfgForOldVeleroInstall.UseRestic = false

View File

@@ -25,6 +25,7 @@ import (
)
const StorageClassName = "e2e-storage-class"
const StorageClassName2 = "e2e-storage-class-2"
var UUIDgen uuid.UUID
@@ -39,50 +40,53 @@ var ReportData *Report
type VeleroConfig struct {
VeleroCfgInPerf
VeleroCLI string
VeleroImage string
VeleroVersion string
CloudCredentialsFile string
BSLConfig string
BSLBucket string
BSLPrefix string
VSLConfig string
CloudProvider string
ObjectStoreProvider string
VeleroNamespace string
AdditionalBSLProvider string
AdditionalBSLBucket string
AdditionalBSLPrefix string
AdditionalBSLConfig string
AdditionalBSLCredentials string
RegistryCredentialFile string
RestoreHelperImage string
UpgradeFromVeleroVersion string
UpgradeFromVeleroCLI string
MigrateFromVeleroVersion string
MigrateFromVeleroCLI string
Plugins string
AddBSLPlugins string
InstallVelero bool
KibishiiDirectory string
Features string
Debug bool
GCFrequency string
DefaultCluster string
StandbyCluster string
ClientToInstallVelero *TestClient
DefaultClient *TestClient
StandbyClient *TestClient
UploaderType string
UseNodeAgent bool
UseRestic bool
ProvideSnapshotsVolumeParam bool
DefaultVolumesToFsBackup bool
UseVolumeSnapshots bool
VeleroServerDebugMode bool
SnapshotMoveData bool
DataMoverPlugin string
StandbyClusterCloudProvider string
VeleroCLI string
VeleroImage string
VeleroVersion string
CloudCredentialsFile string
BSLConfig string
BSLBucket string
BSLPrefix string
VSLConfig string
CloudProvider string
ObjectStoreProvider string
VeleroNamespace string
AdditionalBSLProvider string
AdditionalBSLBucket string
AdditionalBSLPrefix string
AdditionalBSLConfig string
AdditionalBSLCredentials string
RegistryCredentialFile string
RestoreHelperImage string
UpgradeFromVeleroVersion string
UpgradeFromVeleroCLI string
MigrateFromVeleroVersion string
MigrateFromVeleroCLI string
Plugins string
AddBSLPlugins string
InstallVelero bool
KibishiiDirectory string
Features string
Debug bool
GCFrequency string
DefaultCluster string
StandbyCluster string
ClientToInstallVelero *TestClient
DefaultClient *TestClient
StandbyClient *TestClient
UploaderType string
UseNodeAgent bool
UseRestic bool
ProvideSnapshotsVolumeParam bool
DefaultVolumesToFsBackup bool
UseVolumeSnapshots bool
VeleroServerDebugMode bool
SnapshotMoveData bool
DataMoverPlugin string
StandbyClusterCloudProvider string
StandbyClusterPlugins string
StandbyClusterOjbectStoreProvider string
DebugVeleroPodRestart bool
}
type VeleroCfgInPerf struct {

View File

@@ -7,6 +7,7 @@ import (
"errors"
"fmt"
"io"
"os"
"os/exec"
)
@@ -87,3 +88,25 @@ func CMDExecWithOutput(checkCMD *exec.Cmd) (*[]byte, error) {
}
return &jsonBuf, err
}
func WriteToFile(content, fileName string) error {
file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
fmt.Println("fail to open file", err)
return err
}
defer file.Close()
write := bufio.NewWriter(file)
_, err = write.WriteString(content)
if err != nil {
fmt.Println("fail to WriteString file", err)
return err
}
err = write.Flush()
if err != nil {
fmt.Println("fail to Flush file", err)
return err
}
return nil
}

View File

@@ -20,6 +20,7 @@ import (
"fmt"
"os"
"os/exec"
"strings"
"time"
"github.com/pkg/errors"
@@ -30,7 +31,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/builder"
veleroexec "github.com/vmware-tanzu/velero/pkg/util/exec"
"github.com/vmware-tanzu/velero/test/util/common"
common "github.com/vmware-tanzu/velero/test/util/common"
)
// ensureClusterExists returns whether or not a Kubernetes cluster exists for tests to be run on.
@@ -289,15 +290,15 @@ func ReadFileFromPodVolume(ctx context.Context, namespace, podName, containerNam
return stdout, err
}
func KubectlGetInfo(cmdName string, arg []string) {
func RunCommand(cmdName string, arg []string) string {
cmd := exec.CommandContext(context.Background(), cmdName, arg...)
fmt.Printf("Kubectl exec cmd =%v\n", cmd)
fmt.Printf("Run cmd =%v\n", cmd)
stdout, stderr, err := veleroexec.RunCommand(cmd)
fmt.Println(stdout)
if err != nil {
fmt.Println(stderr)
fmt.Println(err)
}
return stdout
}
func KubectlGetDsJson(veleroNamespace string) (string, error) {
@@ -363,3 +364,26 @@ func CreateVolumes(pvcName string, volumeNameList []string) (vols []*corev1.Volu
}
return
}
func CollectClusterEvents(key string, pods []string) {
prefix := "pod"
date := RunCommand("date", []string{"-u"})
logs := []string{}
logs = append(logs, date)
logs = append(logs, RunCommand("kubectl", []string{"get", "events", "-o", "custom-columns=FirstSeen:.firstTimestamp,Count:.count,From:.source.component,Type:.type,Reason:.reason,Message:.message", "--all-namespaces"}))
logs = append(logs, RunCommand("kubectl", []string{"get", "events", "-o", "yaml", "--all-namespaces"}))
for _, pod := range pods {
logs = append(logs, RunCommand("kubectl", []string{"logs", "-n", "velero", pod, "--previous"}))
prefix = fmt.Sprintf("%s-%s", prefix, pod)
}
logs = append(logs, RunCommand("date", []string{"-u"}))
log := strings.Join(logs, "\n")
fileName := fmt.Sprintf("%s-%s", prefix, key)
fmt.Printf("Cluster event log file %s: %s", fileName, log)
err := common.WriteToFile(log, fileName)
if err != nil {
fmt.Printf("Fail to log cluster event: %v", err)
}
}

View File

@@ -105,6 +105,7 @@ func RunKibishiiTests(veleroCfg VeleroConfig, backupName, restoreName, backupLoc
RunDebug(context.Background(), veleroCLI, veleroNamespace, backupName, "")
return errors.Wrapf(err, "Failed to backup kibishii namespace %s", kibishiiNamespace)
}
fmt.Printf("VeleroBackupNamespace done %s\n", time.Now().Format("2006-01-02 15:04:05"))
if useVolumeSnapshots {
if providerName == "vsphere" {
@@ -291,15 +292,16 @@ func verifyData(ctx context.Context, namespace string, kibishiiData *KibishiiDat
return false, nil
}
if err != nil {
fmt.Printf("Kibishi verify stdout Timeout occurred: %s stderr: %s err: %s", stdout, stderr, err)
fmt.Printf("Kibishi verify stdout Timeout occurred: %s stderr: %s err: %s\n", stdout, stderr, err)
return false, nil
}
return true, nil
})
if err != nil {
return errors.Wrapf(err, fmt.Sprintf("Failed to wait generate data in namespace %s", namespace))
return errors.Wrapf(err, fmt.Sprintf("Failed to verify kibishii data in namespace %s\n", namespace))
}
fmt.Printf("Success to verify kibishii data in namespace %s\n", namespace)
return nil
}

View File

@@ -18,15 +18,20 @@ package providers
import (
"fmt"
"net/url"
"os"
"strconv"
"strings"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/endpoints"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/pkg/errors"
"golang.org/x/net/context"
"github.com/vmware-tanzu/velero/pkg/cmd/util/flag"
"github.com/vmware-tanzu/velero/test"
@@ -34,6 +39,173 @@ import (
type AWSStorage string
const (
s3URLKey = "s3Url"
publicURLKey = "publicUrl"
kmsKeyIDKey = "kmsKeyId"
customerKeyEncryptionFileKey = "customerKeyEncryptionFile"
s3ForcePathStyleKey = "s3ForcePathStyle"
bucketKey = "bucket"
signatureVersionKey = "signatureVersion"
credentialsFileKey = "credentialsFile"
credentialProfileKey = "profile"
serverSideEncryptionKey = "serverSideEncryption"
insecureSkipTLSVerifyKey = "insecureSkipTLSVerify"
caCertKey = "caCert"
enableSharedConfigKey = "enableSharedConfig"
)
func newSessionOptions(config aws.Config, profile, caCert, credentialsFile, enableSharedConfig string) (session.Options, error) {
sessionOptions := session.Options{Config: config, Profile: profile}
if caCert != "" {
sessionOptions.CustomCABundle = strings.NewReader(caCert)
}
if credentialsFile == "" && os.Getenv("AWS_SHARED_CREDENTIALS_FILE") != "" {
credentialsFile = os.Getenv("AWS_SHARED_CREDENTIALS_FILE")
}
if credentialsFile != "" {
if _, err := os.Stat(credentialsFile); err != nil {
if os.IsNotExist(err) {
return session.Options{}, errors.Wrapf(err, "provided credentialsFile does not exist")
}
return session.Options{}, errors.Wrapf(err, "could not get credentialsFile info")
}
sessionOptions.SharedConfigFiles = append(sessionOptions.SharedConfigFiles, credentialsFile)
sessionOptions.SharedConfigState = session.SharedConfigEnable
}
return sessionOptions, nil
}
// takes AWS session options to create a new session
func getSession(options session.Options) (*session.Session, error) {
sess, err := session.NewSessionWithOptions(options)
if err != nil {
return nil, errors.WithStack(err)
}
if _, err := sess.Config.Credentials.Get(); err != nil {
return nil, errors.WithStack(err)
}
return sess, nil
}
// GetBucketRegion returns the AWS region that a bucket is in, or an error
// if the region cannot be determined.
func GetBucketRegion(bucket string) (string, error) {
var region string
session, err := session.NewSession()
if err != nil {
return "", errors.WithStack(err)
}
for _, partition := range endpoints.DefaultPartitions() {
for regionHint := range partition.Regions() {
region, _ = s3manager.GetBucketRegion(context.Background(), session, bucket, regionHint)
// we only need to try a single region hint per partition, so break after the first
break
}
if region != "" {
return region, nil
}
}
return "", errors.New("unable to determine bucket's region")
}
func (s AWSStorage) CreateSession(credentialProfile, credentialsFile, enableSharedConfig, caCert, bucket, bslPrefix, bslConfig string) (*session.Session, error) {
var err error
config := flag.NewMap()
config.Set(bslConfig)
region := config.Data()["region"]
objectsInput := s3.ListObjectsV2Input{}
objectsInput.Bucket = aws.String(bucket)
objectsInput.Delimiter = aws.String("/")
s3URL := ""
s3ForcePathStyleVal := ""
s3ForcePathStyle := false
if s3ForcePathStyleVal != "" {
if s3ForcePathStyle, err = strconv.ParseBool(s3ForcePathStyleVal); err != nil {
return nil, errors.Wrapf(err, "could not parse %s (expected bool)", s3ForcePathStyleKey)
}
}
// AWS (not an alternate S3-compatible API) and region not
// explicitly specified: determine the bucket's region
if s3URL == "" && region == "" {
var err error
region, err = GetBucketRegion(bucket)
if err != nil {
return nil, err
}
}
serverConfig, err := newAWSConfig(s3URL, region, s3ForcePathStyle)
if err != nil {
return nil, err
}
sessionOptions, err := newSessionOptions(*serverConfig, credentialProfile, caCert, credentialsFile, enableSharedConfig)
if err != nil {
return nil, err
}
serverSession, err := getSession(sessionOptions)
if err != nil {
fmt.Println(err)
return nil, err
}
return serverSession, nil
}
// IsValidS3URLScheme returns true if the scheme is http:// or https://
// and the url parses correctly, otherwise, return false
func IsValidS3URLScheme(s3URL string) bool {
u, err := url.Parse(s3URL)
if err != nil {
return false
}
if u.Scheme != "http" && u.Scheme != "https" {
return false
}
return true
}
func newAWSConfig(url, region string, forcePathStyle bool) (*aws.Config, error) {
awsConfig := aws.NewConfig().
WithRegion(region).
WithS3ForcePathStyle(forcePathStyle)
if url != "" {
if !IsValidS3URLScheme(url) {
return nil, errors.Errorf("Invalid s3 url %s, URL must be valid according to https://golang.org/pkg/net/url/#Parse and start with http:// or https://", url)
}
awsConfig = awsConfig.WithEndpointResolver(
endpoints.ResolverFunc(func(service, region string, optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
if service == s3.EndpointsID {
return endpoints.ResolvedEndpoint{
URL: url,
}, nil
}
return endpoints.DefaultResolver().EndpointFor(service, region, optFns...)
}),
)
}
return awsConfig, nil
}
func (s AWSStorage) ListItems(client *s3.S3, objectsV2Input *s3.ListObjectsV2Input) (*s3.ListObjectsV2Output, error) {
res, err := client.ListObjectsV2(objectsV2Input)
if err != nil {
@@ -54,15 +226,20 @@ func (s AWSStorage) DeleteItem(client *s3.S3, deleteObjectV2Input *s3.DeleteObje
func (s AWSStorage) IsObjectsInBucket(cloudCredentialsFile, bslBucket, bslPrefix, bslConfig, backupObject string) (bool, error) {
config := flag.NewMap()
config.Set(bslConfig)
region := config.Data()["region"]
objectsInput := s3.ListObjectsV2Input{}
objectsInput.Bucket = aws.String(bslBucket)
objectsInput.Delimiter = aws.String("/")
s3url := ""
if bslPrefix != "" {
objectsInput.Prefix = aws.String(bslPrefix)
}
var err error
var s3Config *aws.Config
var sess *session.Session
region := config.Data()["region"]
s3url := ""
if region == "minio" {
s3url = config.Data()["s3Url"]
s3Config = &aws.Config{
@@ -72,21 +249,19 @@ func (s AWSStorage) IsObjectsInBucket(cloudCredentialsFile, bslBucket, bslPrefix
DisableSSL: aws.Bool(true),
S3ForcePathStyle: aws.Bool(true),
}
sess, err = session.NewSession(s3Config)
} else {
s3Config = &aws.Config{
Region: aws.String(region),
Credentials: credentials.NewSharedCredentials(cloudCredentialsFile, ""),
}
sess, err = s.CreateSession("", cloudCredentialsFile, "false", "", "", "", bslConfig)
}
sess, err := session.NewSession(s3Config)
if err != nil {
return false, errors.Wrapf(err, "Failed to create AWS session")
return false, errors.Wrapf(err, fmt.Sprintf("Failed to create AWS session of region %s", region))
}
svc := s3.New(sess)
bucketObjects, err := s.ListItems(svc, &objectsInput)
if err != nil {
fmt.Println("Couldn't retrieve bucket items!")
return false, errors.Wrapf(err, "Couldn't retrieve bucket items")
}
@@ -111,12 +286,17 @@ func (s AWSStorage) IsObjectsInBucket(cloudCredentialsFile, bslBucket, bslPrefix
func (s AWSStorage) DeleteObjectsInBucket(cloudCredentialsFile, bslBucket, bslPrefix, bslConfig, backupObject string) error {
config := flag.NewMap()
config.Set(bslConfig)
var err error
var sess *session.Session
region := config.Data()["region"]
s3url := ""
s3Config := &aws.Config{
Region: aws.String(region),
Credentials: credentials.NewSharedCredentials(cloudCredentialsFile, ""),
}
if region == "minio" {
s3url = config.Data()["s3Url"]
s3Config = &aws.Config{
@@ -126,12 +306,17 @@ func (s AWSStorage) DeleteObjectsInBucket(cloudCredentialsFile, bslBucket, bslPr
DisableSSL: aws.Bool(true),
S3ForcePathStyle: aws.Bool(true),
}
sess, err = session.NewSession(s3Config)
} else {
sess, err = s.CreateSession("", cloudCredentialsFile, "false", "", "", "", bslConfig)
}
sess, err := session.NewSession(s3Config)
if err != nil {
return errors.Wrapf(err, "Error waiting for uploads to complete")
return errors.Wrapf(err, fmt.Sprintf("Failed to create AWS session of region %s", region))
}
svc := s3.New(sess)
fullPrefix := strings.Trim(bslPrefix, "/") + "/" + strings.Trim(backupObject, "/") + "/"
iter := s3manager.NewDeleteListIterator(svc, &s3.ListObjectsInput{
Bucket: aws.String(bslBucket),
@@ -139,7 +324,7 @@ func (s AWSStorage) DeleteObjectsInBucket(cloudCredentialsFile, bslBucket, bslPr
})
if err := s3manager.NewBatchDeleteWithClient(svc).Delete(aws.BackgroundContext(), iter); err != nil {
return errors.Wrapf(err, "Error waiting for uploads to complete")
return errors.Wrapf(err, "Fail to delete object")
}
fmt.Printf("Deleted object(s) from bucket: %s %s \n", bslBucket, fullPrefix)
return nil
@@ -150,16 +335,15 @@ func (s AWSStorage) IsSnapshotExisted(cloudCredentialsFile, bslConfig, backupObj
config := flag.NewMap()
config.Set(bslConfig)
region := config.Data()["region"]
s3Config := &aws.Config{
Region: aws.String(region),
Credentials: credentials.NewSharedCredentials(cloudCredentialsFile, ""),
}
if region == "minio" {
return errors.New("No snapshot for Minio provider")
}
sess, err := session.NewSession(s3Config)
sess, err := s.CreateSession("", cloudCredentialsFile, "false", "", "", "", bslConfig)
if err != nil {
return errors.Wrapf(err, "Error waiting for uploads to complete")
fmt.Printf("Fail to create session with profile %s and config %s", cloudCredentialsFile, bslConfig)
return errors.Wrapf(err, "Fail to create session with profile %s and config %s", cloudCredentialsFile, bslConfig)
}
svc := ec2.New(sess)
params := &ec2.DescribeSnapshotsInput{

View File

@@ -240,7 +240,7 @@ func installVeleroServer(ctx context.Context, cli, cloudProvider string, options
args = append(args, "--features", options.Features)
if strings.EqualFold(options.Features, "EnableCSI") && options.UseVolumeSnapshots {
if strings.EqualFold(cloudProvider, "azure") {
if err := KubectlApplyByFile(ctx, "util/csi/AzureVolumeSnapshotClass.yaml"); err != nil {
if err := KubectlApplyByFile(ctx, "../util/csi/AzureVolumeSnapshotClass.yaml"); err != nil {
return err
}
}

View File

@@ -217,8 +217,14 @@ func checkBackupPhase(ctx context.Context, veleroCLI string, veleroNamespace str
return err
}
if backup.Status.Phase != expectedPhase {
if VeleroCfg.DebugVeleroPodRestart {
pods, err := GetVeleroPodName(ctx)
fmt.Println(err)
CollectClusterEvents(backupName, pods)
}
return errors.Errorf("Unexpected backup phase got %s, expecting %s", backup.Status.Phase, expectedPhase)
}
return nil
}
@@ -239,6 +245,11 @@ func checkRestorePhase(ctx context.Context, veleroCLI string, veleroNamespace st
return err
}
if restore.Status.Phase != expectedPhase {
if VeleroCfg.DebugVeleroPodRestart {
pods, err := GetVeleroPodName(ctx)
fmt.Println(err)
CollectClusterEvents(restoreName, pods)
}
return errors.Errorf("Unexpected restore phase got %s, expecting %s", restore.Status.Phase, expectedPhase)
}
return nil
@@ -442,7 +453,6 @@ func VeleroRestoreExec(ctx context.Context, veleroCLI, veleroNamespace, restoreN
if err := VeleroCmdExec(ctx, veleroCLI, args); err != nil {
return err
}
return checkRestorePhase(ctx, veleroCLI, veleroNamespace, restoreName, phaseExpect)
}
@@ -1241,7 +1251,7 @@ func GetVersionList(veleroCli, veleroVersion string) []VeleroCLI2Version {
}
return veleroCLI2VersionList
}
func DeleteBackups(ctx context.Context, client TestClient) error {
func DeleteAllBackups(ctx context.Context, client TestClient) error {
backupList := new(velerov1api.BackupList)
if err := client.Kubebuilder.List(ctx, backupList, &kbclient.ListOptions{Namespace: VeleroCfg.VeleroNamespace}); err != nil {
return fmt.Errorf("failed to list backup object in %s namespace with err %v", VeleroCfg.VeleroNamespace, err)
@@ -1255,6 +1265,24 @@ func DeleteBackups(ctx context.Context, client TestClient) error {
return nil
}
func DeleteBackups(ctx context.Context, client TestClient, backupNames []string) error {
backupList := new(velerov1api.BackupList)
if err := client.Kubebuilder.List(ctx, backupList, &kbclient.ListOptions{Namespace: VeleroCfg.VeleroNamespace}); err != nil {
return fmt.Errorf("failed to list backup object in %s namespace with err %v", VeleroCfg.VeleroNamespace, err)
}
for _, backup := range backupList.Items {
for _, bn := range backupNames {
if backup.Name == bn {
fmt.Printf("Backup %s is going to be deleted...\n", backup.Name)
if err := VeleroBackupDelete(ctx, VeleroCfg.VeleroCLI, VeleroCfg.VeleroNamespace, backup.Name); err != nil {
return err
}
}
}
}
return nil
}
func DeleteRestores(ctx context.Context, client TestClient) error {
restoreList := new(velerov1api.RestoreList)
if err := client.Kubebuilder.List(ctx, restoreList, &kbclient.ListOptions{Namespace: VeleroCfg.VeleroNamespace}); err != nil {
@@ -1482,3 +1510,56 @@ func IsSupportUploaderType(version string) (bool, error) {
return false, nil
}
}
func GetVeleroPodName(ctx context.Context) ([]string, error) {
// Example:
// NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
// kibishii-data-kibishii-deployment-0 Bound pvc-94b9fdf2-c30f-4a7b-87bf-06eadca0d5b6 1Gi RWO kibishii-storage-class 115s
cmds := []*common.OsCommandLine{}
cmd := &common.OsCommandLine{
Cmd: "kubectl",
Args: []string{"get", "pod", "-n", "velero"},
}
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "grep",
Args: []string{"velero"},
}
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "awk",
Args: []string{"{print $1}"},
}
cmds = append(cmds, cmd)
return common.GetListByCmdPipes(ctx, cmds)
}
func InstallTestStorageClasses(path string) error {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
err := InstallStorageClass(ctx, path)
if err != nil {
return err
}
content, err := os.ReadFile(path)
if err != nil {
return errors.Wrapf(err, "failed to get %s when install storage class", path)
}
// replace sc to new value
newContent := strings.ReplaceAll(string(content), fmt.Sprintf("name: %s", StorageClassName), fmt.Sprintf("name: %s", StorageClassName2))
tmpFile, err := os.CreateTemp("", "sc-file")
if err != nil {
return errors.Wrapf(err, "failed to create temp file when install storage class")
}
defer os.Remove(tmpFile.Name())
if _, err := tmpFile.WriteString(newContent); err != nil {
return errors.Wrapf(err, "failed to write content into temp file %s when install storage class", tmpFile.Name())
}
return InstallStorageClass(ctx, tmpFile.Name())
}