diff --git a/changelogs/unreleased/9419-shubham-pampattiwar b/changelogs/unreleased/9419-shubham-pampattiwar new file mode 100644 index 000000000..9f21ac8ca --- /dev/null +++ b/changelogs/unreleased/9419-shubham-pampattiwar @@ -0,0 +1 @@ +Apply volume policies to VolumeGroupSnapshot PVC filtering diff --git a/pkg/backup/actions/csi/pvc_action.go b/pkg/backup/actions/csi/pvc_action.go index c91d7a2b4..62c51accc 100644 --- a/pkg/backup/actions/csi/pvc_action.go +++ b/pkg/backup/actions/csi/pvc_action.go @@ -621,8 +621,30 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference( return nil, errors.Wrapf(err, "failed to list PVCs in VolumeGroupSnapshot group %q in namespace %q", group, pvc.Namespace) } + // Filter PVCs by volume policy + filteredPVCs, err := p.filterPVCsByVolumePolicy(groupedPVCs, backup) + if err != nil { + return nil, errors.Wrapf(err, "failed to filter PVCs by volume policy for VolumeGroupSnapshot group %q", group) + } + + // Warn if any PVCs were filtered out + if len(filteredPVCs) < len(groupedPVCs) { + for _, originalPVC := range groupedPVCs { + found := false + for _, filteredPVC := range filteredPVCs { + if originalPVC.Name == filteredPVC.Name { + found = true + break + } + } + if !found { + p.log.Warnf("PVC %s/%s has VolumeGroupSnapshot label %s=%s but is excluded by volume policy", originalPVC.Namespace, originalPVC.Name, vgsLabelKey, group) + } + } + } + // Determine the CSI driver for the grouped PVCs - driver, err := p.determineCSIDriver(groupedPVCs) + driver, err := p.determineCSIDriver(filteredPVCs) if err != nil { return nil, errors.Wrapf(err, "failed to determine CSI driver for PVCs in VolumeGroupSnapshot group %q", group) } @@ -643,7 +665,7 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference( } // Wait for all the VS objects associated with the VGS to have status and VGS Name (VS readiness is checked in legacy flow) and get the PVC-to-VS map - vsMap, err := p.waitForVGSAssociatedVS(ctx, groupedPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration) + vsMap, err := p.waitForVGSAssociatedVS(ctx, filteredPVCs, newVGS, backup.Spec.CSISnapshotTimeout.Duration) if err != nil { return nil, errors.Wrapf(err, "timeout waiting for VolumeSnapshots to have status created via VolumeGroupSnapshot %s", newVGS.Name) } @@ -734,6 +756,40 @@ func (p *pvcBackupItemAction) listGroupedPVCs(ctx context.Context, namespace, la return pvcList.Items, nil } +func (p *pvcBackupItemAction) filterPVCsByVolumePolicy( + pvcs []corev1api.PersistentVolumeClaim, + backup *velerov1api.Backup, +) ([]corev1api.PersistentVolumeClaim, error) { + var filteredPVCs []corev1api.PersistentVolumeClaim + + for _, pvc := range pvcs { + // Convert PVC to unstructured for ShouldPerformSnapshotWithBackup + pvcMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&pvc) + if err != nil { + return nil, errors.Wrapf(err, "failed to convert PVC %s/%s to unstructured", pvc.Namespace, pvc.Name) + } + unstructuredPVC := &unstructured.Unstructured{Object: pvcMap} + + // Check if this PVC should be snapshotted according to volume policies + shouldSnapshot, err := volumehelper.ShouldPerformSnapshotWithBackup( + unstructuredPVC, + kuberesource.PersistentVolumeClaims, + *backup, + p.crClient, + p.log, + ) + if err != nil { + return nil, errors.Wrapf(err, "failed to check volume policy for PVC %s/%s", pvc.Namespace, pvc.Name) + } + + if shouldSnapshot { + filteredPVCs = append(filteredPVCs, pvc) + } + } + + return filteredPVCs, nil +} + func (p *pvcBackupItemAction) determineCSIDriver( pvcs []corev1api.PersistentVolumeClaim, ) (string, error) { diff --git a/pkg/backup/actions/csi/pvc_action_test.go b/pkg/backup/actions/csi/pvc_action_test.go index 795e3c038..6280b3ebd 100644 --- a/pkg/backup/actions/csi/pvc_action_test.go +++ b/pkg/backup/actions/csi/pvc_action_test.go @@ -586,6 +586,280 @@ func TestListGroupedPVCs(t *testing.T) { } } +func TestFilterPVCsByVolumePolicy(t *testing.T) { + tests := []struct { + name string + pvcs []corev1api.PersistentVolumeClaim + pvs []corev1api.PersistentVolume + volumePolicyStr string + expectCount int + expectError bool + }{ + { + name: "All PVCs should be included when no volume policy", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-1", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-1", + StorageClassName: pointer.String("sc-1"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-2", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-2", + StorageClassName: pointer.String("sc-1"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-1"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-2"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver-1"}, + }, + }, + }, + }, + expectCount: 2, + }, + { + name: "Filter out NFS PVC by volume policy", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-csi", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-csi", + StorageClassName: pointer.String("sc-1"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-csi"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "csi-driver"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export", + }, + }, + }, + }, + }, + volumePolicyStr: ` +version: v1 +volumePolicies: +- conditions: + nfs: {} + action: + type: skip +`, + expectCount: 1, + }, + { + name: "All PVCs filtered out by volume policy", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-1", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs-1", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pvc-nfs-2", Namespace: "ns-1"}, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs-2", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-1"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export/1", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs-2"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export/2", + }, + }, + }, + }, + }, + volumePolicyStr: ` +version: v1 +volumePolicies: +- conditions: + nfs: {} + action: + type: skip +`, + expectCount: 0, + }, + { + name: "Filter out non-CSI PVCs from mixed driver group", + pvcs: []corev1api.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc-linstor", + Namespace: "ns-1", + Labels: map[string]string{"app.kubernetes.io/instance": "myapp"}, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-linstor", + StorageClassName: pointer.String("sc-linstor"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc-nfs", + Namespace: "ns-1", + Labels: map[string]string{"app.kubernetes.io/instance": "myapp"}, + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-nfs", + StorageClassName: pointer.String("sc-nfs"), + }, + Status: corev1api.PersistentVolumeClaimStatus{Phase: corev1api.ClaimBound}, + }, + }, + pvs: []corev1api.PersistentVolume{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-linstor"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + CSI: &corev1api.CSIPersistentVolumeSource{Driver: "linstor.csi.linbit.com"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pv-nfs"}, + Spec: corev1api.PersistentVolumeSpec{ + PersistentVolumeSource: corev1api.PersistentVolumeSource{ + NFS: &corev1api.NFSVolumeSource{ + Server: "nfs-server", + Path: "/export", + }, + }, + }, + }, + }, + volumePolicyStr: ` +version: v1 +volumePolicies: +- conditions: + nfs: {} + action: + type: skip +`, + expectCount: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objs := []runtime.Object{} + for i := range tt.pvs { + objs = append(objs, &tt.pvs[i]) + } + + client := velerotest.NewFakeControllerRuntimeClient(t, objs...) + + backup := &velerov1api.Backup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-backup", + Namespace: "velero", + }, + Spec: velerov1api.BackupSpec{}, + } + + // Add volume policy ConfigMap if specified + if tt.volumePolicyStr != "" { + cm := &corev1api.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "volume-policy", + Namespace: "velero", + }, + Data: map[string]string{ + "volume-policy": tt.volumePolicyStr, + }, + } + require.NoError(t, client.Create(t.Context(), cm)) + + backup.Spec.ResourcePolicy = &corev1api.TypedLocalObjectReference{ + Kind: "ConfigMap", + Name: "volume-policy", + } + } + + action := &pvcBackupItemAction{ + log: velerotest.NewLogger(), + crClient: client, + } + + result, err := action.filterPVCsByVolumePolicy(tt.pvcs, backup) + if tt.expectError { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Len(t, result, tt.expectCount) + + // For mixed driver scenarios, verify filtered result can determine single CSI driver + if tt.name == "Filter out non-CSI PVCs from mixed driver group" && len(result) > 0 { + driver, err := action.determineCSIDriver(result) + require.NoError(t, err, "After filtering, determineCSIDriver should not fail with multiple drivers error") + require.Equal(t, "linstor.csi.linbit.com", driver, "Should have the Linstor driver after filtering out NFS") + } + } + }) + } +} + func TestDetermineCSIDriver(t *testing.T) { tests := []struct { name string diff --git a/site/content/docs/main/volume-group-snapshots.md b/site/content/docs/main/volume-group-snapshots.md index cb2e545d2..95cf33ff9 100644 --- a/site/content/docs/main/volume-group-snapshots.md +++ b/site/content/docs/main/volume-group-snapshots.md @@ -298,6 +298,96 @@ You can customize the label key that Velero uses to identify VGS groups. This is 3. **Default Value (Lowest Priority):** If you don't provide any custom configuration, Velero defaults to using `velero.io/volume-group`. +## Volume Policies and VolumeGroupSnapshots + +Volume policies control which volumes should be backed up and how (snapshot vs filesystem backup). When using VolumeGroupSnapshots, volume policies are applied **before** grouping PVCs. + +### How Volume Policies Affect VGS + +When Velero processes PVCs for a VolumeGroupSnapshot: + +1. **Label Matching:** All PVCs with the matching VGS label are identified +2. **Policy Filtering:** Volume policies are evaluated for each PVC +3. **Group Creation:** Only PVCs that should be snapshotted (not excluded by policy) are included in the VGS +4. **Warning Logging:** If any PVCs are excluded from the group by volume policy, a warning is logged + +This behavior ensures that volume policies take precedence over VGS labels. The VGS label indicates "group these volumes **if they're being backed up**", while the volume policy determines "which volumes to back up". + +### Example Scenario + +Consider an application with mixed storage types where some volumes should be excluded: + +```yaml +# Database PVC using CSI driver (should be backed up) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: db-data + namespace: my-app + labels: + app.kubernetes.io/instance: myapp # VGS label +spec: + storageClassName: csi-storage + # ... + +--- +# Config PVC using NFS (should be excluded) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: config-data + namespace: my-app + labels: + app.kubernetes.io/instance: myapp # Same VGS label +spec: + storageClassName: nfs-storage + # ... +``` + +**Volume Policy Configuration:** +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: velero-volume-policies + namespace: velero +data: + volume-policy: | + version: v1 + volumePolicies: + - conditions: + nfs: {} + action: + type: skip +``` + +**Backup Configuration:** +```yaml +apiVersion: velero.io/v1 +kind: Backup +metadata: + name: myapp-backup +spec: + includedNamespaces: + - my-app + volumeGroupSnapshotLabelKey: app.kubernetes.io/instance + resourcePolicy: + kind: ConfigMap + name: velero-volume-policies +``` + +**Result:** +- The NFS PVC (`config-data`) is filtered out by the volume policy +- Only the CSI PVC (`db-data`) is included in the VolumeGroupSnapshot +- A warning is logged: `PVC my-app/config-data has VolumeGroupSnapshot label app.kubernetes.io/instance=myapp but is excluded by volume policy` +- The backup succeeds with a single-volume VGS instead of failing with "multiple CSI drivers" error + +### Best Practices + +1. **Use Specific Labels:** When possible, use VGS labels that only target volumes you want to group, rather than relying on volume policies for filtering +2. **Monitor Warnings:** Review backup logs for volume policy exclusion warnings to ensure intended PVCs are being backed up +3. **Test Configurations:** Verify that your volume policy and VGS label combinations produce the expected grouping in a test environment + ## Troubleshooting ### Common Issues and Solutions @@ -334,6 +424,36 @@ kubectl logs -n kube-system -l app=ebs-csi-controller --tail=100 - Check VolumeGroupSnapshotClass configuration - Ensure storage backend supports group snapshots +#### Multiple CSI Drivers Error + +**Symptoms:** Backup fails with error about multiple CSI drivers found +``` +Error backing up item: failed to determine CSI driver for PVCs in VolumeGroupSnapshot group: +found multiple CSI drivers: linstor.csi.linbit.com and nfs.csi.k8s.io +``` + +**Cause:** PVCs with the same VGS label use different CSI drivers or include non-CSI volumes + +**Solutions:** +1. Use more specific labels that only match PVCs using the same CSI driver +2. Use volume policies to exclude PVCs that shouldn't be snapshotted: + ```yaml + apiVersion: v1 + kind: ConfigMap + metadata: + name: velero-volume-policies + namespace: velero + data: + volume-policy: | + version: v1 + volumePolicies: + - conditions: + nfs: {} + action: + type: skip + ``` +3. Check backup logs for volume policy warnings to verify filtering is working + #### VolumeGroupSnapshot Setup: Default VolumeSnapshotClass Required **Issue**