diff --git a/changelogs/unreleased/1713-skriss b/changelogs/unreleased/1713-skriss new file mode 100644 index 000000000..798998b78 --- /dev/null +++ b/changelogs/unreleased/1713-skriss @@ -0,0 +1 @@ +properly restore PVs backed up with restic and a reclaim policy of "Retain" diff --git a/pkg/restic/backupper.go b/pkg/restic/backupper.go index 985befeae..756b6f569 100644 --- a/pkg/restic/backupper.go +++ b/pkg/restic/backupper.go @@ -150,7 +150,7 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api. continue } - volumeBackup := newPodVolumeBackup(backup, pod, volumeName, repo.Spec.ResticIdentifier) + volumeBackup := newPodVolumeBackup(backup, pod, volume, repo.Spec.ResticIdentifier) numVolumeSnapshots++ if volumeBackup, err = b.repoManager.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(volumeBackup); err != nil { errs = append(errs, err) @@ -221,8 +221,8 @@ func isHostPathVolume(volume *corev1api.Volume, pvcGetter pvcGetter, pvGetter pv return pv.Spec.HostPath != nil, nil } -func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volumeName, repoIdentifier string) *velerov1api.PodVolumeBackup { - return &velerov1api.PodVolumeBackup{ +func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volume corev1api.Volume, repoIdentifier string) *velerov1api.PodVolumeBackup { + pvb := &velerov1api.PodVolumeBackup{ ObjectMeta: metav1.ObjectMeta{ Namespace: backup.Namespace, GenerateName: backup.Name + "-", @@ -248,19 +248,29 @@ func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volumeNa Name: pod.Name, UID: pod.UID, }, - Volume: volumeName, + Volume: volume.Name, Tags: map[string]string{ "backup": backup.Name, "backup-uid": string(backup.UID), "pod": pod.Name, "pod-uid": string(pod.UID), "ns": pod.Namespace, - "volume": volumeName, + "volume": volume.Name, }, BackupStorageLocation: backup.Spec.StorageLocation, RepoIdentifier: repoIdentifier, }, } + + // if the volume is for a PVC, annotate the pod volume backup with its name + // for easy identification as a PVC backup during restore. + if volume.PersistentVolumeClaim != nil { + pvb.SetAnnotations(map[string]string{ + PVCNameAnnotation: volume.PersistentVolumeClaim.ClaimName, + }) + } + + return pvb } func errorOnly(_ interface{}, err error) error { diff --git a/pkg/restic/common.go b/pkg/restic/common.go index d29e8cabf..413c80a00 100644 --- a/pkg/restic/common.go +++ b/pkg/restic/common.go @@ -35,11 +35,24 @@ import ( ) const ( - DaemonSet = "restic" - InitContainer = "restic-wait" + // DaemonSet is the name of the Velero restic daemonset. + DaemonSet = "restic" + + // InitContainer is the name of the init container added + // to workload pods to help with restores. + InitContainer = "restic-wait" + + // DefaultMaintenanceFrequency is the default time interval + // at which restic check & prune are run. DefaultMaintenanceFrequency = 24 * time.Hour + // PVCNameAnnotation is the key for the annotation added to + // pod volume backups when they're for a PVC. + PVCNameAnnotation = "velero.io/pvc-name" + // Deprecated. + // + // TODO(2.0): remove podAnnotationPrefix = "snapshot.velero.io/" volumesToBackupAnnotation = "backup.velero.io/backup-volumes" diff --git a/pkg/restore/restore.go b/pkg/restore/restore.go index 80a2fd401..dbff5b3eb 100644 --- a/pkg/restore/restore.go +++ b/pkg/restore/restore.go @@ -874,41 +874,48 @@ func (ctx *context) restoreItem(obj *unstructured.Unstructured, groupResource sc } if groupResource == kuberesource.PersistentVolumes { - var hasSnapshot bool - - for _, snapshot := range ctx.volumeSnapshots { - if snapshot.Spec.PersistentVolumeName == name { - hasSnapshot = true - break + switch { + case hasSnapshot(name, ctx.volumeSnapshots): + // Check if the PV exists in the cluster before attempting to create + // a volume from the snapshot, in order to avoid orphaned volumes (GH #609) + shouldRestoreSnapshot, err := ctx.shouldRestore(name, resourceClient) + if err != nil { + addToResult(&errs, namespace, errors.Wrapf(err, "error waiting on in-cluster persistentvolume %s", name)) + return warnings, errs } - } - if !hasSnapshot && hasDeleteReclaimPolicy(obj.Object) { - ctx.log.Infof("Not restoring PV because it doesn't have a snapshot and its reclaim policy is Delete.") + if shouldRestoreSnapshot { + ctx.log.Infof("Restoring persistent volume from snapshot.") + updatedObj, err := ctx.pvRestorer.executePVAction(obj) + if err != nil { + addToResult(&errs, namespace, fmt.Errorf("error executing PVAction for %s: %v", resourceID, err)) + return warnings, errs + } + obj = updatedObj + } + case hasResticBackup(obj, ctx): + ctx.log.Infof("Dynamically re-provisioning persistent volume because it has a restic backup to be restored.") ctx.pvsToProvision.Insert(name) - return warnings, errs - } - // Check if the PV exists in the cluster before attempting to create - // a volume from the snapshot, in order to avoid orphaned volumes (GH #609) - shouldRestoreSnapshot, err := ctx.shouldRestore(name, resourceClient) - if err != nil { - addToResult(&errs, namespace, errors.Wrapf(err, "error waiting on in-cluster persistentvolume %s", name)) + // return early because we don't want to restore the PV itself, we want to dynamically re-provision it. return warnings, errs - } + case hasDeleteReclaimPolicy(obj.Object): + ctx.log.Infof("Dynamically re-provisioning persistent volume because it doesn't have a snapshot and its reclaim policy is Delete.") + ctx.pvsToProvision.Insert(name) - // PV's existence will be recorded later. Just skip the volume restore logic. - if shouldRestoreSnapshot { - // restore the PV from snapshot (if applicable) + // return early because we don't want to restore the PV itself, we want to dynamically re-provision it. + return warnings, errs + default: + ctx.log.Infof("Restoring persistent volume as-is because it doesn't have a snapshot and its reclaim policy is not Delete.") + + // we call the pvRestorer here to clear out the PV's claimRef, so it can be re-claimed + // when its PVC is restored. updatedObj, err := ctx.pvRestorer.executePVAction(obj) if err != nil { addToResult(&errs, namespace, fmt.Errorf("error executing PVAction for %s: %v", resourceID, err)) return warnings, errs } obj = updatedObj - } else if err != nil { - addToResult(&errs, namespace, fmt.Errorf("error checking existence for PV %s: %v", name, err)) - return warnings, errs } } @@ -1125,6 +1132,42 @@ func restorePodVolumeBackups(ctx *context, createdObj *unstructured.Unstructured } } +func hasSnapshot(pvName string, snapshots []*volume.Snapshot) bool { + for _, snapshot := range snapshots { + if snapshot.Spec.PersistentVolumeName == pvName { + return true + } + } + + return false +} + +func hasResticBackup(unstructuredPV *unstructured.Unstructured, ctx *context) bool { + if len(ctx.podVolumeBackups) == 0 { + return false + } + + pv := new(v1.PersistentVolume) + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredPV.Object, pv); err != nil { + ctx.log.WithError(err).Warnf("Unable to convert PV from unstructured to structured") + return false + } + + if pv.Spec.ClaimRef == nil { + return false + } + + var found bool + for _, pvb := range ctx.podVolumeBackups { + if pvb.Spec.Pod.Namespace == pv.Spec.ClaimRef.Namespace && pvb.GetAnnotations()[restic.PVCNameAnnotation] == pv.Spec.ClaimRef.Name { + found = true + break + } + } + + return found +} + func hasDeleteReclaimPolicy(obj map[string]interface{}) bool { policy, _, _ := unstructured.NestedString(obj, "spec", "persistentVolumeReclaimPolicy") return policy == string(v1.PersistentVolumeReclaimDelete) diff --git a/pkg/restore/restore_test.go b/pkg/restore/restore_test.go index 737a24a82..d4e62c2e6 100644 --- a/pkg/restore/restore_test.go +++ b/pkg/restore/restore_test.go @@ -1826,15 +1826,7 @@ func TestRestorePersistentVolumes(t *testing.T) { }, }, volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{ - { - ObjectMeta: metav1.ObjectMeta{ - Namespace: velerov1api.DefaultNamespace, - Name: "default", - }, - Spec: velerov1api.VolumeSnapshotLocationSpec{ - Provider: "provider-1", - }, - }, + builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(), }, volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{ "provider-1": &volumeSnapshotter{ @@ -1883,15 +1875,7 @@ func TestRestorePersistentVolumes(t *testing.T) { }, }, volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{ - { - ObjectMeta: metav1.ObjectMeta{ - Namespace: velerov1api.DefaultNamespace, - Name: "default", - }, - Spec: velerov1api.VolumeSnapshotLocationSpec{ - Provider: "provider-1", - }, - }, + builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(), }, volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{ "provider-1": &volumeSnapshotter{ @@ -1945,15 +1929,7 @@ func TestRestorePersistentVolumes(t *testing.T) { }, }, volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{ - { - ObjectMeta: metav1.ObjectMeta{ - Namespace: velerov1api.DefaultNamespace, - Name: "default", - }, - Spec: velerov1api.VolumeSnapshotLocationSpec{ - Provider: "provider-1", - }, - }, + builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(), }, volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{ // the volume snapshotter fake is not configured with any snapshotID -> volumeID @@ -2005,15 +1981,7 @@ func TestRestorePersistentVolumes(t *testing.T) { }, }, volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{ - { - ObjectMeta: metav1.ObjectMeta{ - Namespace: velerov1api.DefaultNamespace, - Name: "default", - }, - Spec: velerov1api.VolumeSnapshotLocationSpec{ - Provider: "provider-1", - }, - }, + builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(), }, volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{ // the volume snapshotter fake is not configured with any snapshotID -> volumeID