diff --git a/changelogs/unreleased/1807-skriss b/changelogs/unreleased/1807-skriss new file mode 100644 index 000000000..e32d6bbe9 --- /dev/null +++ b/changelogs/unreleased/1807-skriss @@ -0,0 +1 @@ +when backing up PVCs with restic, specify --parent flag to prevent full volume rescans after pod reschedules diff --git a/pkg/apis/velero/v1/labels_annotations.go b/pkg/apis/velero/v1/labels_annotations.go index f0a4d8ec7..04fb355c5 100644 --- a/pkg/apis/velero/v1/labels_annotations.go +++ b/pkg/apis/velero/v1/labels_annotations.go @@ -35,6 +35,9 @@ const ( // PodUIDLabel is the label key used to identify a pod by uid. PodUIDLabel = "velero.io/pod-uid" + // PVCUIDLabel is the label key used to identify a PVC by uid. + PVCUIDLabel = "velero.io/pvc-uid" + // PodVolumeOperationTimeoutAnnotation is the annotation key used to apply // a backup/restore-specific timeout value for pod volume operations (i.e. // restic backups/restores). diff --git a/pkg/controller/pod_volume_backup_controller.go b/pkg/controller/pod_volume_backup_controller.go index 62e6b80e7..9b3e8cbcb 100644 --- a/pkg/controller/pod_volume_backup_controller.go +++ b/pkg/controller/pod_volume_backup_controller.go @@ -27,6 +27,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/clock" corev1informers "k8s.io/client-go/informers/core/v1" @@ -235,6 +236,21 @@ func (c *podVolumeBackupController) processBackup(req *velerov1api.PodVolumeBack resticCmd.Env = env } + // If this is a PVC, look for the most recent completed pod volume backup for it and get + // its restic snapshot ID to use as the value of the `--parent` flag. Without this, + // if the pod using the PVC (and therefore the directory path under /host_pods/) has + // changed since the PVC's last backup, restic will not be able to identify a suitable + // parent snapshot to use, and will have to do a full rescan of the contents of the PVC. + if pvcUID, ok := req.Labels[velerov1api.PVCUIDLabel]; ok { + parentSnapshotID := getParentSnapshot(log, pvcUID, c.podVolumeBackupLister.PodVolumeBackups(req.Namespace)) + if parentSnapshotID == "" { + log.Info("No parent snapshot found for PVC, not using --parent flag for this backup") + } else { + log.WithField("parentSnapshotID", parentSnapshotID).Info("Setting --parent flag for this backup") + resticCmd.ExtraFlags = append(resticCmd.ExtraFlags, fmt.Sprintf("--parent=%s", parentSnapshotID)) + } + } + var stdout, stderr string var emptySnapshot bool @@ -277,6 +293,45 @@ func (c *podVolumeBackupController) processBackup(req *velerov1api.PodVolumeBack return nil } +// getParentSnapshot finds the most recent completed pod volume backup for the specified PVC and returns its +// restic snapshot ID. Any errors encountered are logged but not returned since they do not prevent a backup +// from proceeding. +func getParentSnapshot(log logrus.FieldLogger, pvcUID string, podVolumeBackupLister listers.PodVolumeBackupNamespaceLister) string { + log = log.WithField("pvcUID", pvcUID) + log.Infof("Looking for most recent completed pod volume backup for this PVC") + + pvcBackups, err := podVolumeBackupLister.List(labels.SelectorFromSet(map[string]string{velerov1api.PVCUIDLabel: pvcUID})) + if err != nil { + log.WithError(errors.WithStack(err)).Error("Error listing pod volume backups for PVC") + return "" + } + + // go through all the pod volume backups for the PVC and look for the most recent completed one + // to use as the parent. + var mostRecentBackup *velerov1api.PodVolumeBackup + for _, backup := range pvcBackups { + if backup.Status.Phase != velerov1api.PodVolumeBackupPhaseCompleted { + continue + } + + if mostRecentBackup == nil || backup.Status.StartTimestamp.After(mostRecentBackup.Status.StartTimestamp.Time) { + mostRecentBackup = backup + } + } + + if mostRecentBackup == nil { + log.Info("No completed pod volume backup found for PVC") + return "" + } + + log.WithFields(map[string]interface{}{ + "parentPodVolumeBackup": mostRecentBackup.Name, + "parentSnapshotID": mostRecentBackup.Status.SnapshotID, + }).Info("Found most recent completed pod volume backup for PVC") + + return mostRecentBackup.Status.SnapshotID +} + func (c *podVolumeBackupController) patchPodVolumeBackup(req *velerov1api.PodVolumeBackup, mutate func(*velerov1api.PodVolumeBackup)) (*velerov1api.PodVolumeBackup, error) { // Record original json oldData, err := json.Marshal(req) diff --git a/pkg/restic/backupper.go b/pkg/restic/backupper.go index 756b6f569..15439b9af 100644 --- a/pkg/restic/backupper.go +++ b/pkg/restic/backupper.go @@ -138,9 +138,18 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api. continue } + var pvc *corev1api.PersistentVolumeClaim + if volume.PersistentVolumeClaim != nil { + pvc, err = b.pvcClient.PersistentVolumeClaims(pod.Namespace).Get(volume.PersistentVolumeClaim.ClaimName, metav1.GetOptions{}) + if err != nil { + errs = append(errs, errors.Wrap(err, "error getting persistent volume claim for volume")) + continue + } + } + // hostPath volumes are not supported because they're not mounted into /var/lib/kubelet/pods, so our // daemonset pod has no way to access their data. - isHostPath, err := isHostPathVolume(&volume, b.pvcClient.PersistentVolumeClaims(pod.Namespace), b.pvClient.PersistentVolumes()) + isHostPath, err := isHostPathVolume(&volume, pvc, b.pvClient.PersistentVolumes()) if err != nil { errs = append(errs, errors.Wrap(err, "error checking if volume is a hostPath volume")) continue @@ -150,7 +159,7 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api. continue } - volumeBackup := newPodVolumeBackup(backup, pod, volume, repo.Spec.ResticIdentifier) + volumeBackup := newPodVolumeBackup(backup, pod, volume, repo.Spec.ResticIdentifier, pvc) numVolumeSnapshots++ if volumeBackup, err = b.repoManager.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(volumeBackup); err != nil { errs = append(errs, err) @@ -195,21 +204,12 @@ type pvGetter interface { // isHostPathVolume returns true if the volume is either a hostPath pod volume or a persistent // volume claim on a hostPath persistent volume, or false otherwise. -func isHostPathVolume(volume *corev1api.Volume, pvcGetter pvcGetter, pvGetter pvGetter) (bool, error) { +func isHostPathVolume(volume *corev1api.Volume, pvc *corev1api.PersistentVolumeClaim, pvGetter pvGetter) (bool, error) { if volume.HostPath != nil { return true, nil } - if volume.PersistentVolumeClaim == nil { - return false, nil - } - - pvc, err := pvcGetter.Get(volume.PersistentVolumeClaim.ClaimName, metav1.GetOptions{}) - if err != nil { - return false, errors.WithStack(err) - } - - if pvc.Spec.VolumeName == "" { + if pvc == nil || pvc.Spec.VolumeName == "" { return false, nil } @@ -221,7 +221,7 @@ func isHostPathVolume(volume *corev1api.Volume, pvcGetter pvcGetter, pvGetter pv return pv.Spec.HostPath != nil, nil } -func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volume corev1api.Volume, repoIdentifier string) *velerov1api.PodVolumeBackup { +func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volume corev1api.Volume, repoIdentifier string, pvc *corev1api.PersistentVolumeClaim) *velerov1api.PodVolumeBackup { pvb := &velerov1api.PodVolumeBackup{ ObjectMeta: metav1.ObjectMeta{ Namespace: backup.Namespace, @@ -262,12 +262,19 @@ func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volume c }, } - // if the volume is for a PVC, annotate the pod volume backup with its name - // for easy identification as a PVC backup during restore. - if volume.PersistentVolumeClaim != nil { - pvb.SetAnnotations(map[string]string{ - PVCNameAnnotation: volume.PersistentVolumeClaim.ClaimName, - }) + if pvc != nil { + // this annotation is used in pkg/restore to identify if a PVC + // has a restic backup. + pvb.Annotations = map[string]string{ + PVCNameAnnotation: pvc.Name, + } + + // this label is used by the pod volume backup controller to tell + // if a pod volume backup is for a PVC. + pvb.Labels[velerov1api.PVCUIDLabel] = string(pvc.UID) + + // this tag is not used by velero, but useful for debugging. + pvb.Spec.Tags["pvc-uid"] = string(pvc.UID) } return pvb diff --git a/pkg/restic/backupper_test.go b/pkg/restic/backupper_test.go index 0ba5c18a0..8b1e03458 100644 --- a/pkg/restic/backupper_test.go +++ b/pkg/restic/backupper_test.go @@ -54,15 +54,13 @@ func TestIsHostPathVolume(t *testing.T) { }, }, } - pvcGetter := &fakePVCGetter{ - pvc: &corev1api.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns-1", - Name: "pvc-1", - }, + pvc := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns-1", + Name: "pvc-1", }, } - isHostPath, err = isHostPathVolume(vol, pvcGetter, nil) + isHostPath, err = isHostPathVolume(vol, pvc, nil) assert.Nil(t, err) assert.False(t, isHostPath) @@ -74,15 +72,13 @@ func TestIsHostPathVolume(t *testing.T) { }, }, } - pvcGetter = &fakePVCGetter{ - pvc: &corev1api.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns-1", - Name: "pvc-1", - }, - Spec: corev1api.PersistentVolumeClaimSpec{ - VolumeName: "pv-1", - }, + pvc = &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns-1", + Name: "pvc-1", + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-1", }, } pvGetter := &fakePVGetter{ @@ -93,7 +89,7 @@ func TestIsHostPathVolume(t *testing.T) { Spec: corev1api.PersistentVolumeSpec{}, }, } - isHostPath, err = isHostPathVolume(vol, pvcGetter, pvGetter) + isHostPath, err = isHostPathVolume(vol, pvc, pvGetter) assert.Nil(t, err) assert.False(t, isHostPath) @@ -105,15 +101,13 @@ func TestIsHostPathVolume(t *testing.T) { }, }, } - pvcGetter = &fakePVCGetter{ - pvc: &corev1api.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns-1", - Name: "pvc-1", - }, - Spec: corev1api.PersistentVolumeClaimSpec{ - VolumeName: "pv-1", - }, + pvc = &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns-1", + Name: "pvc-1", + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "pv-1", }, } pvGetter = &fakePVGetter{ @@ -128,23 +122,11 @@ func TestIsHostPathVolume(t *testing.T) { }, }, } - isHostPath, err = isHostPathVolume(vol, pvcGetter, pvGetter) + isHostPath, err = isHostPathVolume(vol, pvc, pvGetter) assert.Nil(t, err) assert.True(t, isHostPath) } -type fakePVCGetter struct { - pvc *corev1api.PersistentVolumeClaim -} - -func (g *fakePVCGetter) Get(name string, opts metav1.GetOptions) (*corev1api.PersistentVolumeClaim, error) { - if g.pvc != nil { - return g.pvc, nil - } - - return nil, errors.New("item not found") -} - type fakePVGetter struct { pv *corev1api.PersistentVolume }