From dc3da29f3ee666162527f149fbcbe9dd6ba3fa61 Mon Sep 17 00:00:00 2001 From: Shubham Pampattiwar Date: Tue, 18 Nov 2025 15:40:30 -0800 Subject: [PATCH 1/4] Apply volume policies to VolumeGroupSnapshot PVC filtering VolumeGroupSnapshots were querying all PVCs with matching labels directly from the cluster without respecting volume policies. This caused errors when labeled PVCs included both CSI and non-CSI volumes, or volumes from different CSI drivers that were excluded by policies. This change filters PVCs by volume policy before VGS grouping, ensuring only PVCs that should be snapshotted are included in the group. A warning is logged when PVCs are excluded from VGS due to volume policy. Fixes #9344 Signed-off-by: Shubham Pampattiwar --- pkg/backup/actions/csi/pvc_action.go | 60 ++++- pkg/backup/actions/csi/pvc_action_test.go | 274 ++++++++++++++++++++++ 2 files changed, 332 insertions(+), 2 deletions(-) diff --git a/pkg/backup/actions/csi/pvc_action.go b/pkg/backup/actions/csi/pvc_action.go index c91d7a2b4..62c51accc 100644 --- a/pkg/backup/actions/csi/pvc_action.go +++ b/pkg/backup/actions/csi/pvc_action.go @@ -621,8 +621,30 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference( return nil, errors.Wrapf(err, "failed to list PVCs in VolumeGroupSnapshot group %q in namespace %q", group, pvc.Namespace) } + // Filter PVCs by volume policy + filteredPVCs, err := p.filterPVCsByVolumePolicy(groupedPVCs, backup) + if err != nil { + return nil, errors.Wrapf(err, "failed to filter PVCs by volume policy for VolumeGroupSnapshot group %q", group) + } + + // Warn if any PVCs were filtered out + if len(filteredPVCs) < len(groupedPVCs) { + for _, originalPVC := range groupedPVCs { + found := false + for _, filteredPVC := range filteredPVCs { + if originalPVC.Name == filteredPVC.Name { + found = true + break + } + } + if !found { + p.log.Warnf("PVC %s/%s has VolumeGroupSnapshot label %s=%s but is excluded by volume policy", originalPVC.Namespace, originalPVC.Name, vgsLabelKey, group) + } + } + } + // Determine the CSI driver for the grouped PVCs - driver, err := p.determineCSIDriver(groupedPVCs) + driver, err := p.determineCSIDriver(filteredPVCs) if err != nil { return nil, errors.Wrapf(err, "failed to determine CSI driver for PVCs in VolumeGroupSnapshot group %q", group) } @@ -643,7 +665,7 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference( } // Wait for all the VS objects associated with the VGS to have status and VGS Name (VS readiness is checked in legacy flow) and get the PVC-to-VS map - vsMap, err := p.waitForVGSAssociatedVS(ctx, groupedPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration) + vsMap, err := p.waitForVGSAssociatedVS(ctx, filteredPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration) if err != nil { return nil, errors.Wrapf(err, "timeout waiting for VolumeSnapshots to have status created via VolumeGroupSnapshot %s", newVGS.Name) } @@ -734,6 +756,40 @@ func (p *pvcBackupItemAction) listGroupedPVCs(ctx context.Context, namespace, la return pvcList.Items, nil } +func (p *pvcBackupItemAction) filterPVCsByVolumePolicy( + pvcs []corev1api.PersistentVolumeClaim, + backup *velerov1api.Backup, +) ([]corev1api.PersistentVolumeClaim, error) { + var filteredPVCs []corev1api.PersistentVolumeClaim + + for _, pvc := range pvcs { + // Convert PVC to unstructured for ShouldPerformSnapshotWithBackup + pvcMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&pvc) + if err != nil { + return nil, errors.Wrapf(err, "failed to convert PVC %s/%s to unstructured", pvc.Namespace, pvc.Name) + } + unstructuredPVC := &unstructured.Unstructured{Object: pvcMap} + + // Check if this PVC should be snapshotted according to volume policies + shouldSnapshot, err := volumehelper.ShouldPerformSnapshotWithBackup( + unstructuredPVC, + kuberesource.PersistentVolumeClaims, + *backup, + p.crClient, + p.log, + ) + if err != nil { + return nil, errors.Wrapf(err, "failed to check volume policy for PVC %s/%s", pvc.Namespace, pvc.Name) + } + + if shouldSnapshot { + filteredPVCs = append(filteredPVCs, pvc) + } + } + + return filteredPVCs, nil +} + func (p *pvcBackupItemAction) determineCSIDriver( pvcs []corev1api.PersistentVolumeClaim, ) (string, error) { diff --git a/pkg/backup/actions/csi/pvc_action_test.go b/pkg/backup/actions/csi/pvc_action_test.go index 795e3c038..8589a740e 100644 --- a/pkg/backup/actions/csi/pvc_action_test.go +++ b/pkg/backup/actions/csi/pvc_action_test.go @@ -586,6 +586,280 @@ func TestListGroupedPVCs(t *testing.T) { } } +func TestFilterPVCsByVolumePolicy(t *testing.T) { + tests := []struct { + name string + pvcs []corev1api.PersistentVolumeClaim + pvs []corev1api.PersistentVolume + volumePolicyStr string + expectCount int + expectError bool + }{ + { + name: "All PVCs should be included when no volume policy", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-1", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-1", + StorageClassName: pointer.String("sc-1"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-2", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-2", + StorageClassName: pointer.String("sc-1"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-1"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-2"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"}, + }, + }, + }, + }, + expectCount: 2, + }, + { + name: "Filter out NFS PVC by volume policy", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-csi", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-csi", + StorageClassName: pointer.String("sc-1"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-csi"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export", + }, + }, + }, + }, + }, + volumePolicyStr: ` +version: v1 +volumePolicies: +- conditions: + nfs: {} + action: + type: skip +`, + expectCount: 1, + }, + { + name: "All PVCs filtered out by volume policy", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-1", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs-1", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-2", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs-2", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-1"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export/1", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-2"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export/2", + }, + }, + }, + }, + }, + volumePolicyStr: ` +version: v1 +volumePolicies: +- conditions: + nfs: {} + action: + type: skip +`, + expectCount: 0, + }, + { + name: "Filter out non-CSI PVCs from mixed driver group", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc-linstor", + Namespace: "ns-1", + Labels: map[string]string{"app.kubernetes.io/instance": "myapp"}, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-linstor", + StorageClassName: pointer.String("sc-linstor"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc-nfs", + Namespace: "ns-1", + Labels: map[string]string{"app.kubernetes.io/instance": "myapp"}, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-linstor"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "linstor.csi.linbit.com"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export", + }, + }, + }, + }, + }, + volumePolicyStr: ` +version: v1 +volumePolicies: +- conditions: + nfs: {} + action: + type: skip +`, + expectCount: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objs := []runtime.Object{} + for i := range tt.pvs { + objs = append(objs, &tt.pvs[i]) + } + + client := velerotest.NewFakeControllerRuntimeClient(t, objs...) + + backup := &velerov1api.Backup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-backup", + Namespace: "velero", + }, + Spec: velerov1api.BackupSpec{}, + } + + // Add volume policy ConfigMap if specified + if tt.volumePolicyStr != "" { + cm := &corev1api.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "volume-policy", + Namespace: "velero", + }, + Data: map[string]string{ + "volume-policy": tt.volumePolicyStr, + }, + } + require.NoError(t, client.Create(context.Background(), cm)) + + backup.Spec.ResourcePolicy = &corev1api.TypedLocalObjectReference{ + Kind: "ConfigMap", + Name: "volume-policy", + } + } + + action := &pvcBackupItemAction{ + log: velerotest.NewLogger(), + crClient: client, + } + + result, err := action.filterPVCsByVolumePolicy(tt.pvcs, backup) + if tt.expectError { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Len(t, result, tt.expectCount) + + // For mixed driver scenarios, verify filtered result can determine single CSI driver + if tt.name == "Filter out non-CSI PVCs from mixed driver group" && len(result) > 0 { + driver, err := action.determineCSIDriver(result) + require.NoError(t, err, "After filtering, determineCSIDriver should not fail with multiple drivers error") + require.Equal(t, "linstor.csi.linbit.com", driver, "Should have the Linstor driver after filtering out NFS") + } + } + }) + } +} + func TestDetermineCSIDriver(t *testing.T) { tests := []struct { name string From c870eb16452ff78982bf7437236358cafc3d519c Mon Sep 17 00:00:00 2001 From: Shubham Pampattiwar Date: Tue, 18 Nov 2025 15:42:53 -0800 Subject: [PATCH 2/4] Add changelog entry for PR #9419 Signed-off-by: Shubham Pampattiwar --- changelogs/unreleased/9419-shubham-pampattiwar | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelogs/unreleased/9419-shubham-pampattiwar diff --git a/changelogs/unreleased/9419-shubham-pampattiwar b/changelogs/unreleased/9419-shubham-pampattiwar new file mode 100644 index 000000000..9f21ac8ca --- /dev/null +++ b/changelogs/unreleased/9419-shubham-pampattiwar @@ -0,0 +1 @@ +Apply volume policies to VolumeGroupSnapshot PVC filtering From 324c2fb448a8bf8a8dd4a18a9847a637b0c65e45 Mon Sep 17 00:00:00 2001 From: Shubham Pampattiwar Date: Tue, 18 Nov 2025 15:46:30 -0800 Subject: [PATCH 3/4] Document volume policy interaction with VolumeGroupSnapshots Add documentation explaining how volume policies are applied before VGS grouping, including examples and troubleshooting guidance for the multiple CSI drivers scenario. Signed-off-by: Shubham Pampattiwar --- .../docs/main/volume-group-snapshots.md | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/site/content/docs/main/volume-group-snapshots.md b/site/content/docs/main/volume-group-snapshots.md index cb2e545d2..95cf33ff9 100644 --- a/site/content/docs/main/volume-group-snapshots.md +++ b/site/content/docs/main/volume-group-snapshots.md @@ -298,6 +298,96 @@ You can customize the label key that Velero uses to identify VGS groups. This is 3. **Default Value (Lowest Priority):** If you don't provide any custom configuration, Velero defaults to using `velero.io/volume-group`. +## Volume Policies and VolumeGroupSnapshots + +Volume policies control which volumes should be backed up and how (snapshot vs filesystem backup). When using VolumeGroupSnapshots, volume policies are applied **before** grouping PVCs. + +### How Volume Policies Affect VGS + +When Velero processes PVCs for a VolumeGroupSnapshot: + +1. **Label Matching:** All PVCs with the matching VGS label are identified +2. **Policy Filtering:** Volume policies are evaluated for each PVC +3. **Group Creation:** Only PVCs that should be snapshotted (not excluded by policy) are included in the VGS +4. **Warning Logging:** If any PVCs are excluded from the group by volume policy, a warning is logged + +This behavior ensures that volume policies take precedence over VGS labels. The VGS label indicates "group these volumes **if they're being backed up**", while the volume policy determines "which volumes to back up". + +### Example Scenario + +Consider an application with mixed storage types where some volumes should be excluded: + +```yaml +# Database PVC using CSI driver (should be backed up) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: db-data + namespace: my-app + labels: + app.kubernetes.io/instance: myapp # VGS label +spec: + storageClassName: csi-storage + # ... + +--- +# Config PVC using NFS (should be excluded) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: config-data + namespace: my-app + labels: + app.kubernetes.io/instance: myapp # Same VGS label +spec: + storageClassName: nfs-storage + # ... +``` + +**Volume Policy Configuration:** +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: velero-volume-policies + namespace: velero +data: + volume-policy: | + version: v1 + volumePolicies: + - conditions: + nfs: {} + action: + type: skip +``` + +**Backup Configuration:** +```yaml +apiVersion: velero.io/v1 +kind: Backup +metadata: + name: myapp-backup +spec: + includedNamespaces: + - my-app + volumeGroupSnapshotLabelKey: app.kubernetes.io/instance + resourcePolicy: + kind: ConfigMap + name: velero-volume-policies +``` + +**Result:** +- The NFS PVC (`config-data`) is filtered out by the volume policy +- Only the CSI PVC (`db-data`) is included in the VolumeGroupSnapshot +- A warning is logged: `PVC my-app/config-data has VolumeGroupSnapshot label app.kubernetes.io/instance=myapp but is excluded by volume policy` +- The backup succeeds with a single-volume VGS instead of failing with "multiple CSI drivers" error + +### Best Practices + +1. **Use Specific Labels:** When possible, use VGS labels that only target volumes you want to group, rather than relying on volume policies for filtering +2. **Monitor Warnings:** Review backup logs for volume policy exclusion warnings to ensure intended PVCs are being backed up +3. **Test Configurations:** Verify that your volume policy and VGS label combinations produce the expected grouping in a test environment + ## Troubleshooting ### Common Issues and Solutions @@ -334,6 +424,36 @@ kubectl logs -n kube-system -l app=ebs-csi-controller --tail=100 - Check VolumeGroupSnapshotClass configuration - Ensure storage backend supports group snapshots +#### Multiple CSI Drivers Error + +**Symptoms:** Backup fails with error about multiple CSI drivers found +``` +Error backing up item: failed to determine CSI driver for PVCs in VolumeGroupSnapshot group: +found multiple CSI drivers: linstor.csi.linbit.com and nfs.csi.k8s.io +``` + +**Cause:** PVCs with the same VGS label use different CSI drivers or include non-CSI volumes + +**Solutions:** +1. Use more specific labels that only match PVCs using the same CSI driver +2. Use volume policies to exclude PVCs that shouldn't be snapshotted: + ```yaml + apiVersion: v1 + kind: ConfigMap + metadata: + name: velero-volume-policies + namespace: velero + data: + volume-policy: | + version: v1 + volumePolicies: + - conditions: + nfs: {} + action: + type: skip + ``` +3. Check backup logs for volume policy warnings to verify filtering is working + #### VolumeGroupSnapshot Setup: Default VolumeSnapshotClass Required **Issue** From c565da2ea6d7ca14e76dc4c84965b726a01261e1 Mon Sep 17 00:00:00 2001 From: Shubham Pampattiwar Date: Tue, 18 Nov 2025 15:58:09 -0800 Subject: [PATCH 4/4] Fix linter error: use t.Context() instead of context.Background() Signed-off-by: Shubham Pampattiwar --- pkg/backup/actions/csi/pvc_action_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/backup/actions/csi/pvc_action_test.go b/pkg/backup/actions/csi/pvc_action_test.go index 8589a740e..6280b3ebd 100644 --- a/pkg/backup/actions/csi/pvc_action_test.go +++ b/pkg/backup/actions/csi/pvc_action_test.go @@ -829,7 +829,7 @@ volumePolicies: "volume-policy": tt.volumePolicyStr, }, } - require.NoError(t, client.Create(context.Background(), cm)) + require.NoError(t, client.Create(t.Context(), cm)) backup.Spec.ResourcePolicy = &corev1api.TypedLocalObjectReference{ Kind: "ConfigMap",