mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-07 13:55:20 +00:00
cancel pvb/pvr on velero server restarts
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
This commit is contained in:
1
changelogs/unreleased/9031-Lyndon-Li
Normal file
1
changelogs/unreleased/9031-Lyndon-Li
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Fix issue #8961, cancel PVB/PVR on Velero server restart
|
||||||
@@ -961,6 +961,7 @@ func markInProgressBackupsFailed(ctx context.Context, client ctrlclient.Client,
|
|||||||
}
|
}
|
||||||
log.WithField("backup", backup.GetName()).Warn(updated.Status.FailureReason)
|
log.WithField("backup", backup.GetName()).Warn(updated.Status.FailureReason)
|
||||||
markDataUploadsCancel(ctx, client, backup, log)
|
markDataUploadsCancel(ctx, client, backup, log)
|
||||||
|
markPodVolumeBackupsCancel(ctx, client, backup, log)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -983,8 +984,10 @@ func markInProgressRestoresFailed(ctx context.Context, client ctrlclient.Client,
|
|||||||
log.WithError(errors.WithStack(err)).Errorf("failed to patch restore %q", restore.GetName())
|
log.WithError(errors.WithStack(err)).Errorf("failed to patch restore %q", restore.GetName())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
log.WithField("restore", restore.GetName()).Warn(updated.Status.FailureReason)
|
log.WithField("restore", restore.GetName()).Warn(updated.Status.FailureReason)
|
||||||
markDataDownloadsCancel(ctx, client, restore, log)
|
markDataDownloadsCancel(ctx, client, restore, log)
|
||||||
|
markPodVolumeRestoresCancel(ctx, client, restore, log)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1069,3 +1072,90 @@ func markDataDownloadsCancel(ctx context.Context, client ctrlclient.Client, rest
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func markPodVolumeBackupsCancel(ctx context.Context, client ctrlclient.Client, backup velerov1api.Backup, log logrus.FieldLogger) {
|
||||||
|
pvbs := &velerov1api.PodVolumeBackupList{}
|
||||||
|
|
||||||
|
if err := client.List(ctx, pvbs, &ctrlclient.ListOptions{
|
||||||
|
Namespace: backup.GetNamespace(),
|
||||||
|
LabelSelector: labels.Set(map[string]string{
|
||||||
|
velerov1api.BackupUIDLabel: string(backup.GetUID()),
|
||||||
|
}).AsSelector(),
|
||||||
|
}); err != nil {
|
||||||
|
log.WithError(errors.WithStack(err)).Error("failed to list PVBs")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range pvbs.Items {
|
||||||
|
pvb := pvbs.Items[i]
|
||||||
|
if pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseAccepted ||
|
||||||
|
pvb.Status.Phase == velerov1api.PodVolumeBackupPhasePrepared ||
|
||||||
|
pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseInProgress ||
|
||||||
|
pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseNew ||
|
||||||
|
pvb.Status.Phase == "" {
|
||||||
|
err := controller.UpdatePVBWithRetry(ctx, client, types.NamespacedName{Namespace: pvb.Namespace, Name: pvb.Name}, log.WithField("PVB", pvb.Name),
|
||||||
|
func(pvb *velerov1api.PodVolumeBackup) bool {
|
||||||
|
if pvb.Spec.Cancel {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
pvb.Spec.Cancel = true
|
||||||
|
pvb.Status.Message = fmt.Sprintf("PVB is in status %q during the velero server starting, mark it as cancel", pvb.Status.Phase)
|
||||||
|
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(errors.WithStack(err)).Errorf("failed to mark PVB %q cancel", pvb.GetName())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
log.WithField("PVB is mark for cancel due to server restart", pvb.GetName()).Warn(pvb.Status.Message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func markPodVolumeRestoresCancel(ctx context.Context, client ctrlclient.Client, restore velerov1api.Restore, log logrus.FieldLogger) {
|
||||||
|
pvrs := &velerov1api.PodVolumeRestoreList{}
|
||||||
|
|
||||||
|
if err := client.List(ctx, pvrs, &ctrlclient.ListOptions{
|
||||||
|
Namespace: restore.GetNamespace(),
|
||||||
|
LabelSelector: labels.Set(map[string]string{
|
||||||
|
velerov1api.RestoreUIDLabel: string(restore.GetUID()),
|
||||||
|
}).AsSelector(),
|
||||||
|
}); err != nil {
|
||||||
|
log.WithError(errors.WithStack(err)).Error("failed to list PVRs")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range pvrs.Items {
|
||||||
|
pvr := pvrs.Items[i]
|
||||||
|
if controller.IsLegacyPVR(&pvr) {
|
||||||
|
log.WithField("PVR", pvr.GetName()).Warn("Found a legacy PVR during velero server restart, cannot stop it")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if pvr.Status.Phase == velerov1api.PodVolumeRestorePhaseAccepted ||
|
||||||
|
pvr.Status.Phase == velerov1api.PodVolumeRestorePhasePrepared ||
|
||||||
|
pvr.Status.Phase == velerov1api.PodVolumeRestorePhaseInProgress ||
|
||||||
|
pvr.Status.Phase == velerov1api.PodVolumeRestorePhaseNew ||
|
||||||
|
pvr.Status.Phase == "" {
|
||||||
|
err := controller.UpdatePVRWithRetry(ctx, client, types.NamespacedName{Namespace: pvr.Namespace, Name: pvr.Name}, log.WithField("PVR", pvr.Name),
|
||||||
|
func(pvr *velerov1api.PodVolumeRestore) bool {
|
||||||
|
if pvr.Spec.Cancel {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
pvr.Spec.Cancel = true
|
||||||
|
pvr.Status.Message = fmt.Sprintf("PVR is in status %q during the velero server starting, mark it as cancel", pvr.Status.Phase)
|
||||||
|
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(errors.WithStack(err)).Errorf("failed to mark PVR %q cancel", pvr.GetName())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
log.WithField("PVR is mark for cancel due to server restart", pvr.GetName()).Warn(pvr.Status.Message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -885,7 +885,7 @@ func UpdateDataDownloadWithRetry(ctx context.Context, client client.Client, name
|
|||||||
err := client.Update(ctx, dd)
|
err := client.Update(ctx, dd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if apierrors.IsConflict(err) {
|
if apierrors.IsConflict(err) {
|
||||||
log.Warnf("failed to update datadownload for %s/%s and will retry it", dd.Namespace, dd.Name)
|
log.Debugf("failed to update datadownload for %s/%s and will retry it", dd.Namespace, dd.Name)
|
||||||
return false, nil
|
return false, nil
|
||||||
} else {
|
} else {
|
||||||
return false, errors.Wrapf(err, "error updating datadownload %s/%s", dd.Namespace, dd.Name)
|
return false, errors.Wrapf(err, "error updating datadownload %s/%s", dd.Namespace, dd.Name)
|
||||||
|
|||||||
@@ -990,7 +990,7 @@ func UpdateDataUploadWithRetry(ctx context.Context, client client.Client, namesp
|
|||||||
err := client.Update(ctx, du)
|
err := client.Update(ctx, du)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if apierrors.IsConflict(err) {
|
if apierrors.IsConflict(err) {
|
||||||
log.Warnf("failed to update dataupload for %s/%s and will retry it", du.Namespace, du.Name)
|
log.Debugf("failed to update dataupload for %s/%s and will retry it", du.Namespace, du.Name)
|
||||||
return false, nil
|
return false, nil
|
||||||
} else {
|
} else {
|
||||||
return false, errors.Wrapf(err, "error updating dataupload with error %s/%s", du.Namespace, du.Name)
|
return false, errors.Wrapf(err, "error updating dataupload with error %s/%s", du.Namespace, du.Name)
|
||||||
|
|||||||
@@ -833,7 +833,7 @@ func UpdatePVBWithRetry(ctx context.Context, client client.Client, namespacedNam
|
|||||||
err := client.Update(ctx, pvb)
|
err := client.Update(ctx, pvb)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if apierrors.IsConflict(err) {
|
if apierrors.IsConflict(err) {
|
||||||
log.Warnf("failed to update PVB for %s/%s and will retry it", pvb.Namespace, pvb.Name)
|
log.Debugf("failed to update PVB for %s/%s and will retry it", pvb.Namespace, pvb.Name)
|
||||||
return false, nil
|
return false, nil
|
||||||
} else {
|
} else {
|
||||||
return false, errors.Wrapf(err, "error updating PVB with error %s/%s", pvb.Namespace, pvb.Name)
|
return false, errors.Wrapf(err, "error updating PVB with error %s/%s", pvb.Namespace, pvb.Name)
|
||||||
|
|||||||
@@ -545,7 +545,7 @@ func (r *PodVolumeRestoreReconciler) closeDataPath(ctx context.Context, pvrName
|
|||||||
func (r *PodVolumeRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
func (r *PodVolumeRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
|
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
|
||||||
pvr := object.(*velerov1api.PodVolumeRestore)
|
pvr := object.(*velerov1api.PodVolumeRestore)
|
||||||
if isLegacyPVR(pvr) {
|
if IsLegacyPVR(pvr) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -570,7 +570,7 @@ func (r *PodVolumeRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|||||||
|
|
||||||
pred := kube.NewAllEventPredicate(func(obj client.Object) bool {
|
pred := kube.NewAllEventPredicate(func(obj client.Object) bool {
|
||||||
pvr := obj.(*velerov1api.PodVolumeRestore)
|
pvr := obj.(*velerov1api.PodVolumeRestore)
|
||||||
return !isLegacyPVR(pvr)
|
return !IsLegacyPVR(pvr)
|
||||||
})
|
})
|
||||||
|
|
||||||
return ctrl.NewControllerManagedBy(mgr).
|
return ctrl.NewControllerManagedBy(mgr).
|
||||||
@@ -620,7 +620,7 @@ func (r *PodVolumeRestoreReconciler) findPVRForTargetPod(ctx context.Context, po
|
|||||||
|
|
||||||
requests := []reconcile.Request{}
|
requests := []reconcile.Request{}
|
||||||
for _, item := range list.Items {
|
for _, item := range list.Items {
|
||||||
if isLegacyPVR(&item) {
|
if IsLegacyPVR(&item) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -897,7 +897,7 @@ func UpdatePVRWithRetry(ctx context.Context, client client.Client, namespacedNam
|
|||||||
err := client.Update(ctx, pvr)
|
err := client.Update(ctx, pvr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if apierrors.IsConflict(err) {
|
if apierrors.IsConflict(err) {
|
||||||
log.Warnf("failed to update PVR for %s/%s and will retry it", pvr.Namespace, pvr.Name)
|
log.Debugf("failed to update PVR for %s/%s and will retry it", pvr.Namespace, pvr.Name)
|
||||||
return false, nil
|
return false, nil
|
||||||
} else {
|
} else {
|
||||||
return false, errors.Wrapf(err, "error updating PVR %s/%s", pvr.Namespace, pvr.Name)
|
return false, errors.Wrapf(err, "error updating PVR %s/%s", pvr.Namespace, pvr.Name)
|
||||||
|
|||||||
@@ -205,7 +205,7 @@ func (c *PodVolumeRestoreReconcilerLegacy) SetupWithManager(mgr ctrl.Manager) er
|
|||||||
// By watching the pods, we can trigger the PVR reconciliation again once the pod is finally scheduled on the node.
|
// By watching the pods, we can trigger the PVR reconciliation again once the pod is finally scheduled on the node.
|
||||||
pred := kube.NewAllEventPredicate(func(obj client.Object) bool {
|
pred := kube.NewAllEventPredicate(func(obj client.Object) bool {
|
||||||
pvr := obj.(*velerov1api.PodVolumeRestore)
|
pvr := obj.(*velerov1api.PodVolumeRestore)
|
||||||
return isLegacyPVR(pvr)
|
return IsLegacyPVR(pvr)
|
||||||
})
|
})
|
||||||
|
|
||||||
return ctrl.NewControllerManagedBy(mgr).Named("podvolumerestorelegacy").
|
return ctrl.NewControllerManagedBy(mgr).Named("podvolumerestorelegacy").
|
||||||
@@ -229,7 +229,7 @@ func (c *PodVolumeRestoreReconcilerLegacy) findVolumeRestoresForPod(ctx context.
|
|||||||
|
|
||||||
requests := []reconcile.Request{}
|
requests := []reconcile.Request{}
|
||||||
for _, item := range list.Items {
|
for _, item := range list.Items {
|
||||||
if !isLegacyPVR(&item) {
|
if !IsLegacyPVR(&item) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -359,6 +359,6 @@ func (c *PodVolumeRestoreReconcilerLegacy) closeDataPath(ctx context.Context, pv
|
|||||||
c.dataPathMgr.RemoveAsyncBR(pvbName)
|
c.dataPathMgr.RemoveAsyncBR(pvbName)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isLegacyPVR(pvr *velerov1api.PodVolumeRestore) bool {
|
func IsLegacyPVR(pvr *velerov1api.PodVolumeRestore) bool {
|
||||||
return pvr.Spec.UploaderType == uploader.ResticType
|
return pvr.Spec.UploaderType == uploader.ResticType
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user