Merge pull request #9419 from shubham-pampattiwar/fix-vgs-volume-policy-9344
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 53s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 2s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 9s
Main CI / Build (push) Failing after 26s
Close stale issues and PRs / stale (push) Successful in 11s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m20s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 56s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m0s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 50s

Apply volume policies to VolumeGroupSnapshot PVC filtering
This commit is contained in:
lyndon-li
2025-11-20 14:28:24 +08:00
committed by GitHub
4 changed files with 453 additions and 2 deletions

View File

@@ -0,0 +1 @@
Apply volume policies to VolumeGroupSnapshot PVC filtering

View File

@@ -621,8 +621,30 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference(
return nil, errors.Wrapf(err, "failed to list PVCs in VolumeGroupSnapshot group %q in namespace %q", group, pvc.Namespace)
}
// Filter PVCs by volume policy
filteredPVCs, err := p.filterPVCsByVolumePolicy(groupedPVCs, backup)
if err != nil {
return nil, errors.Wrapf(err, "failed to filter PVCs by volume policy for VolumeGroupSnapshot group %q", group)
}
// Warn if any PVCs were filtered out
if len(filteredPVCs) < len(groupedPVCs) {
for _, originalPVC := range groupedPVCs {
found := false
for _, filteredPVC := range filteredPVCs {
if originalPVC.Name == filteredPVC.Name {
found = true
break
}
}
if !found {
p.log.Warnf("PVC %s/%s has VolumeGroupSnapshot label %s=%s but is excluded by volume policy", originalPVC.Namespace, originalPVC.Name, vgsLabelKey, group)
}
}
}
// Determine the CSI driver for the grouped PVCs
driver, err := p.determineCSIDriver(groupedPVCs)
driver, err := p.determineCSIDriver(filteredPVCs)
if err != nil {
return nil, errors.Wrapf(err, "failed to determine CSI driver for PVCs in VolumeGroupSnapshot group %q", group)
}
@@ -643,7 +665,7 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference(
}
// Wait for all the VS objects associated with the VGS to have status and VGS Name (VS readiness is checked in legacy flow) and get the PVC-to-VS map
vsMap, err := p.waitForVGSAssociatedVS(ctx, groupedPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration)
vsMap, err := p.waitForVGSAssociatedVS(ctx, filteredPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration)
if err != nil {
return nil, errors.Wrapf(err, "timeout waiting for VolumeSnapshots to have status created via VolumeGroupSnapshot %s", newVGS.Name)
}
@@ -734,6 +756,40 @@ func (p *pvcBackupItemAction) listGroupedPVCs(ctx context.Context, namespace, la
return pvcList.Items, nil
}
func (p *pvcBackupItemAction) filterPVCsByVolumePolicy(
pvcs []corev1api.PersistentVolumeClaim,
backup *velerov1api.Backup,
) ([]corev1api.PersistentVolumeClaim, error) {
var filteredPVCs []corev1api.PersistentVolumeClaim
for _, pvc := range pvcs {
// Convert PVC to unstructured for ShouldPerformSnapshotWithBackup
pvcMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&pvc)
if err != nil {
return nil, errors.Wrapf(err, "failed to convert PVC %s/%s to unstructured", pvc.Namespace, pvc.Name)
}
unstructuredPVC := &unstructured.Unstructured{Object: pvcMap}
// Check if this PVC should be snapshotted according to volume policies
shouldSnapshot, err := volumehelper.ShouldPerformSnapshotWithBackup(
unstructuredPVC,
kuberesource.PersistentVolumeClaims,
*backup,
p.crClient,
p.log,
)
if err != nil {
return nil, errors.Wrapf(err, "failed to check volume policy for PVC %s/%s", pvc.Namespace, pvc.Name)
}
if shouldSnapshot {
filteredPVCs = append(filteredPVCs, pvc)
}
}
return filteredPVCs, nil
}
func (p *pvcBackupItemAction) determineCSIDriver(
pvcs []corev1api.PersistentVolumeClaim,
) (string, error) {

View File

@@ -586,6 +586,280 @@ func TestListGroupedPVCs(t *testing.T) {
}
}
func TestFilterPVCsByVolumePolicy(t *testing.T) {
tests := []struct {
name string
pvcs []corev1api.PersistentVolumeClaim
pvs []corev1api.PersistentVolume
volumePolicyStr string
expectCount int
expectError bool
}{
{
name: "All PVCs should be included when no volume policy",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-1", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-1",
StorageClassName: pointer.String("sc-1"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-2", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-2",
StorageClassName: pointer.String("sc-1"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-1"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-2"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"},
},
},
},
},
expectCount: 2,
},
{
name: "Filter out NFS PVC by volume policy",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-csi", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-csi",
StorageClassName: pointer.String("sc-1"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-csi"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export",
},
},
},
},
},
volumePolicyStr: `
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
`,
expectCount: 1,
},
{
name: "All PVCs filtered out by volume policy",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-1", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs-1",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-2", Namespace: "ns-1"},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs-2",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-1"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export/1",
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-2"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export/2",
},
},
},
},
},
volumePolicyStr: `
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
`,
expectCount: 0,
},
{
name: "Filter out non-CSI PVCs from mixed driver group",
pvcs: []corev1api.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pvc-linstor",
Namespace: "ns-1",
Labels: map[string]string{"app.kubernetes.io/instance": "myapp"},
},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-linstor",
StorageClassName: pointer.String("sc-linstor"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pvc-nfs",
Namespace: "ns-1",
Labels: map[string]string{"app.kubernetes.io/instance": "myapp"},
},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "pv-nfs",
StorageClassName: pointer.String("sc-nfs"),
},
Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound},
},
},
pvs: []corev1api.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-linstor"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
CSI: &corev1api.CSIPersistentVolumeSource{Driver: "linstor.csi.linbit.com"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"},
Spec: corev1api.PersistentVolumeSpec{
PersistentVolumeSource: corev1api.PersistentVolumeSource{
NFS: &corev1api.NFSVolumeSource{
Server: "nfs-server",
Path: "/export",
},
},
},
},
},
volumePolicyStr: `
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
`,
expectCount: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
objs := []runtime.Object{}
for i := range tt.pvs {
objs = append(objs, &tt.pvs[i])
}
client := velerotest.NewFakeControllerRuntimeClient(t, objs...)
backup := &velerov1api.Backup{
ObjectMeta: metav1.ObjectMeta{
Name: "test-backup",
Namespace: "velero",
},
Spec: velerov1api.BackupSpec{},
}
// Add volume policy ConfigMap if specified
if tt.volumePolicyStr != "" {
cm := &corev1api.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "volume-policy",
Namespace: "velero",
},
Data: map[string]string{
"volume-policy": tt.volumePolicyStr,
},
}
require.NoError(t, client.Create(t.Context(), cm))
backup.Spec.ResourcePolicy = &corev1api.TypedLocalObjectReference{
Kind: "ConfigMap",
Name: "volume-policy",
}
}
action := &pvcBackupItemAction{
log: velerotest.NewLogger(),
crClient: client,
}
result, err := action.filterPVCsByVolumePolicy(tt.pvcs, backup)
if tt.expectError {
require.Error(t, err)
} else {
require.NoError(t, err)
require.Len(t, result, tt.expectCount)
// For mixed driver scenarios, verify filtered result can determine single CSI driver
if tt.name == "Filter out non-CSI PVCs from mixed driver group" && len(result) > 0 {
driver, err := action.determineCSIDriver(result)
require.NoError(t, err, "After filtering, determineCSIDriver should not fail with multiple drivers error")
require.Equal(t, "linstor.csi.linbit.com", driver, "Should have the Linstor driver after filtering out NFS")
}
}
})
}
}
func TestDetermineCSIDriver(t *testing.T) {
tests := []struct {
name string

View File

@@ -298,6 +298,96 @@ You can customize the label key that Velero uses to identify VGS groups. This is
3. **Default Value (Lowest Priority):** If you don't provide any custom configuration, Velero defaults to using `velero.io/volume-group`.
## Volume Policies and VolumeGroupSnapshots
Volume policies control which volumes should be backed up and how (snapshot vs filesystem backup). When using VolumeGroupSnapshots, volume policies are applied **before** grouping PVCs.
### How Volume Policies Affect VGS
When Velero processes PVCs for a VolumeGroupSnapshot:
1. **Label Matching:** All PVCs with the matching VGS label are identified
2. **Policy Filtering:** Volume policies are evaluated for each PVC
3. **Group Creation:** Only PVCs that should be snapshotted (not excluded by policy) are included in the VGS
4. **Warning Logging:** If any PVCs are excluded from the group by volume policy, a warning is logged
This behavior ensures that volume policies take precedence over VGS labels. The VGS label indicates "group these volumes **if they're being backed up**", while the volume policy determines "which volumes to back up".
### Example Scenario
Consider an application with mixed storage types where some volumes should be excluded:
```yaml
# Database PVC using CSI driver (should be backed up)
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: db-data
namespace: my-app
labels:
app.kubernetes.io/instance: myapp # VGS label
spec:
storageClassName: csi-storage
# ...
---
# Config PVC using NFS (should be excluded)
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: config-data
namespace: my-app
labels:
app.kubernetes.io/instance: myapp # Same VGS label
spec:
storageClassName: nfs-storage
# ...
```
**Volume Policy Configuration:**
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: velero-volume-policies
namespace: velero
data:
volume-policy: |
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
```
**Backup Configuration:**
```yaml
apiVersion: velero.io/v1
kind: Backup
metadata:
name: myapp-backup
spec:
includedNamespaces:
- my-app
volumeGroupSnapshotLabelKey: app.kubernetes.io/instance
resourcePolicy:
kind: ConfigMap
name: velero-volume-policies
```
**Result:**
- The NFS PVC (`config-data`) is filtered out by the volume policy
- Only the CSI PVC (`db-data`) is included in the VolumeGroupSnapshot
- A warning is logged: `PVC my-app/config-data has VolumeGroupSnapshot label app.kubernetes.io/instance=myapp but is excluded by volume policy`
- The backup succeeds with a single-volume VGS instead of failing with "multiple CSI drivers" error
### Best Practices
1. **Use Specific Labels:** When possible, use VGS labels that only target volumes you want to group, rather than relying on volume policies for filtering
2. **Monitor Warnings:** Review backup logs for volume policy exclusion warnings to ensure intended PVCs are being backed up
3. **Test Configurations:** Verify that your volume policy and VGS label combinations produce the expected grouping in a test environment
## Troubleshooting
### Common Issues and Solutions
@@ -334,6 +424,36 @@ kubectl logs -n kube-system -l app=ebs-csi-controller --tail=100
- Check VolumeGroupSnapshotClass configuration
- Ensure storage backend supports group snapshots
#### Multiple CSI Drivers Error
**Symptoms:** Backup fails with error about multiple CSI drivers found
```
Error backing up item: failed to determine CSI driver for PVCs in VolumeGroupSnapshot group:
found multiple CSI drivers: linstor.csi.linbit.com and nfs.csi.k8s.io
```
**Cause:** PVCs with the same VGS label use different CSI drivers or include non-CSI volumes
**Solutions:**
1. Use more specific labels that only match PVCs using the same CSI driver
2. Use volume policies to exclude PVCs that shouldn't be snapshotted:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: velero-volume-policies
namespace: velero
data:
volume-policy: |
version: v1
volumePolicies:
- conditions:
nfs: {}
action:
type: skip
```
3. Check backup logs for volume policy warnings to verify filtering is working
#### VolumeGroupSnapshot Setup: Default VolumeSnapshotClass Required
**Issue**