Properly restore PVs with a reclaim policy of Retain and restic backups (#1713)

* reorganize persistent volume restore code for clarity

Signed-off-by: Steve Kriss <krisss@vmware.com>
This commit is contained in:
Steve Kriss
2019-08-06 14:40:35 -06:00
committed by KubeKween
parent 4accb8512a
commit 80692a8a39
5 changed files with 101 additions and 66 deletions

View File

@@ -0,0 +1 @@
properly restore PVs backed up with restic and a reclaim policy of "Retain"

View File

@@ -150,7 +150,7 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api.
continue
}
volumeBackup := newPodVolumeBackup(backup, pod, volumeName, repo.Spec.ResticIdentifier)
volumeBackup := newPodVolumeBackup(backup, pod, volume, repo.Spec.ResticIdentifier)
numVolumeSnapshots++
if volumeBackup, err = b.repoManager.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(volumeBackup); err != nil {
errs = append(errs, err)
@@ -221,8 +221,8 @@ func isHostPathVolume(volume *corev1api.Volume, pvcGetter pvcGetter, pvGetter pv
return pv.Spec.HostPath != nil, nil
}
func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volumeName, repoIdentifier string) *velerov1api.PodVolumeBackup {
return &velerov1api.PodVolumeBackup{
func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volume corev1api.Volume, repoIdentifier string) *velerov1api.PodVolumeBackup {
pvb := &velerov1api.PodVolumeBackup{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
GenerateName: backup.Name + "-",
@@ -248,19 +248,29 @@ func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volumeNa
Name: pod.Name,
UID: pod.UID,
},
Volume: volumeName,
Volume: volume.Name,
Tags: map[string]string{
"backup": backup.Name,
"backup-uid": string(backup.UID),
"pod": pod.Name,
"pod-uid": string(pod.UID),
"ns": pod.Namespace,
"volume": volumeName,
"volume": volume.Name,
},
BackupStorageLocation: backup.Spec.StorageLocation,
RepoIdentifier: repoIdentifier,
},
}
// if the volume is for a PVC, annotate the pod volume backup with its name
// for easy identification as a PVC backup during restore.
if volume.PersistentVolumeClaim != nil {
pvb.SetAnnotations(map[string]string{
PVCNameAnnotation: volume.PersistentVolumeClaim.ClaimName,
})
}
return pvb
}
func errorOnly(_ interface{}, err error) error {

View File

@@ -35,11 +35,24 @@ import (
)
const (
DaemonSet = "restic"
InitContainer = "restic-wait"
// DaemonSet is the name of the Velero restic daemonset.
DaemonSet = "restic"
// InitContainer is the name of the init container added
// to workload pods to help with restores.
InitContainer = "restic-wait"
// DefaultMaintenanceFrequency is the default time interval
// at which restic check & prune are run.
DefaultMaintenanceFrequency = 24 * time.Hour
// PVCNameAnnotation is the key for the annotation added to
// pod volume backups when they're for a PVC.
PVCNameAnnotation = "velero.io/pvc-name"
// Deprecated.
//
// TODO(2.0): remove
podAnnotationPrefix = "snapshot.velero.io/"
volumesToBackupAnnotation = "backup.velero.io/backup-volumes"

View File

@@ -874,41 +874,48 @@ func (ctx *context) restoreItem(obj *unstructured.Unstructured, groupResource sc
}
if groupResource == kuberesource.PersistentVolumes {
var hasSnapshot bool
for _, snapshot := range ctx.volumeSnapshots {
if snapshot.Spec.PersistentVolumeName == name {
hasSnapshot = true
break
switch {
case hasSnapshot(name, ctx.volumeSnapshots):
// Check if the PV exists in the cluster before attempting to create
// a volume from the snapshot, in order to avoid orphaned volumes (GH #609)
shouldRestoreSnapshot, err := ctx.shouldRestore(name, resourceClient)
if err != nil {
addToResult(&errs, namespace, errors.Wrapf(err, "error waiting on in-cluster persistentvolume %s", name))
return warnings, errs
}
}
if !hasSnapshot && hasDeleteReclaimPolicy(obj.Object) {
ctx.log.Infof("Not restoring PV because it doesn't have a snapshot and its reclaim policy is Delete.")
if shouldRestoreSnapshot {
ctx.log.Infof("Restoring persistent volume from snapshot.")
updatedObj, err := ctx.pvRestorer.executePVAction(obj)
if err != nil {
addToResult(&errs, namespace, fmt.Errorf("error executing PVAction for %s: %v", resourceID, err))
return warnings, errs
}
obj = updatedObj
}
case hasResticBackup(obj, ctx):
ctx.log.Infof("Dynamically re-provisioning persistent volume because it has a restic backup to be restored.")
ctx.pvsToProvision.Insert(name)
return warnings, errs
}
// Check if the PV exists in the cluster before attempting to create
// a volume from the snapshot, in order to avoid orphaned volumes (GH #609)
shouldRestoreSnapshot, err := ctx.shouldRestore(name, resourceClient)
if err != nil {
addToResult(&errs, namespace, errors.Wrapf(err, "error waiting on in-cluster persistentvolume %s", name))
// return early because we don't want to restore the PV itself, we want to dynamically re-provision it.
return warnings, errs
}
case hasDeleteReclaimPolicy(obj.Object):
ctx.log.Infof("Dynamically re-provisioning persistent volume because it doesn't have a snapshot and its reclaim policy is Delete.")
ctx.pvsToProvision.Insert(name)
// PV's existence will be recorded later. Just skip the volume restore logic.
if shouldRestoreSnapshot {
// restore the PV from snapshot (if applicable)
// return early because we don't want to restore the PV itself, we want to dynamically re-provision it.
return warnings, errs
default:
ctx.log.Infof("Restoring persistent volume as-is because it doesn't have a snapshot and its reclaim policy is not Delete.")
// we call the pvRestorer here to clear out the PV's claimRef, so it can be re-claimed
// when its PVC is restored.
updatedObj, err := ctx.pvRestorer.executePVAction(obj)
if err != nil {
addToResult(&errs, namespace, fmt.Errorf("error executing PVAction for %s: %v", resourceID, err))
return warnings, errs
}
obj = updatedObj
} else if err != nil {
addToResult(&errs, namespace, fmt.Errorf("error checking existence for PV %s: %v", name, err))
return warnings, errs
}
}
@@ -1125,6 +1132,42 @@ func restorePodVolumeBackups(ctx *context, createdObj *unstructured.Unstructured
}
}
func hasSnapshot(pvName string, snapshots []*volume.Snapshot) bool {
for _, snapshot := range snapshots {
if snapshot.Spec.PersistentVolumeName == pvName {
return true
}
}
return false
}
func hasResticBackup(unstructuredPV *unstructured.Unstructured, ctx *context) bool {
if len(ctx.podVolumeBackups) == 0 {
return false
}
pv := new(v1.PersistentVolume)
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredPV.Object, pv); err != nil {
ctx.log.WithError(err).Warnf("Unable to convert PV from unstructured to structured")
return false
}
if pv.Spec.ClaimRef == nil {
return false
}
var found bool
for _, pvb := range ctx.podVolumeBackups {
if pvb.Spec.Pod.Namespace == pv.Spec.ClaimRef.Namespace && pvb.GetAnnotations()[restic.PVCNameAnnotation] == pv.Spec.ClaimRef.Name {
found = true
break
}
}
return found
}
func hasDeleteReclaimPolicy(obj map[string]interface{}) bool {
policy, _, _ := unstructured.NestedString(obj, "spec", "persistentVolumeReclaimPolicy")
return policy == string(v1.PersistentVolumeReclaimDelete)

View File

@@ -1826,15 +1826,7 @@ func TestRestorePersistentVolumes(t *testing.T) {
},
},
volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1api.DefaultNamespace,
Name: "default",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
},
builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(),
},
volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{
"provider-1": &volumeSnapshotter{
@@ -1883,15 +1875,7 @@ func TestRestorePersistentVolumes(t *testing.T) {
},
},
volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1api.DefaultNamespace,
Name: "default",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
},
builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(),
},
volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{
"provider-1": &volumeSnapshotter{
@@ -1945,15 +1929,7 @@ func TestRestorePersistentVolumes(t *testing.T) {
},
},
volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1api.DefaultNamespace,
Name: "default",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
},
builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(),
},
volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{
// the volume snapshotter fake is not configured with any snapshotID -> volumeID
@@ -2005,15 +1981,7 @@ func TestRestorePersistentVolumes(t *testing.T) {
},
},
volumeSnapshotLocations: []*velerov1api.VolumeSnapshotLocation{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1api.DefaultNamespace,
Name: "default",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
},
builder.ForVolumeSnapshotLocation(velerov1api.DefaultNamespace, "default").Provider("provider-1").Result(),
},
volumeSnapshotterGetter: map[string]velero.VolumeSnapshotter{
// the volume snapshotter fake is not configured with any snapshotID -> volumeID