Wait for results of restore exec hook executions in Finalizing phase instead of InProgress phase

Signed-off-by: allenxu404 <qix2@vmware.com>
This commit is contained in:
allenxu404
2024-04-03 11:28:04 +08:00
parent f3295ccf08
commit 28552258ae
12 changed files with 541 additions and 175 deletions

View File

@@ -33,6 +33,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/clock"
"github.com/vmware-tanzu/velero/internal/hook"
"github.com/vmware-tanzu/velero/internal/volume"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/metrics"
@@ -55,6 +56,7 @@ type restoreFinalizerReconciler struct {
metrics *metrics.ServerMetrics
clock clock.WithTickerAndDelayedExecution
crClient client.Client
multiHookTracker *hook.MultiHookTracker
}
func NewRestoreFinalizerReconciler(
@@ -65,6 +67,7 @@ func NewRestoreFinalizerReconciler(
backupStoreGetter persistence.ObjectBackupStoreGetter,
metrics *metrics.ServerMetrics,
crClient client.Client,
multiHookTracker *hook.MultiHookTracker,
) *restoreFinalizerReconciler {
return &restoreFinalizerReconciler{
Client: client,
@@ -75,6 +78,7 @@ func NewRestoreFinalizerReconciler(
metrics: metrics,
clock: &clock.RealClock{},
crClient: crClient,
multiHookTracker: multiHookTracker,
}
}
@@ -151,11 +155,12 @@ func (r *restoreFinalizerReconciler) Reconcile(ctx context.Context, req ctrl.Req
restoredPVCList := volume.RestoredPVCFromRestoredResourceList(restoredResourceList)
finalizerCtx := &finalizerContext{
logger: log,
restore: restore,
crClient: r.crClient,
volumeInfo: volumeInfo,
restoredPVCList: restoredPVCList,
logger: log,
restore: restore,
crClient: r.crClient,
volumeInfo: volumeInfo,
restoredPVCList: restoredPVCList,
multiHookTracker: r.multiHookTracker,
}
warnings, errs := finalizerCtx.execute()
@@ -233,11 +238,12 @@ func (r *restoreFinalizerReconciler) finishProcessing(restorePhase velerov1api.R
// finalizerContext includes all the dependencies required by finalization tasks and
// a function execute() to orderly implement task logic.
type finalizerContext struct {
logger logrus.FieldLogger
restore *velerov1api.Restore
crClient client.Client
volumeInfo []*volume.BackupVolumeInfo
restoredPVCList map[string]struct{}
logger logrus.FieldLogger
restore *velerov1api.Restore
crClient client.Client
volumeInfo []*volume.BackupVolumeInfo
restoredPVCList map[string]struct{}
multiHookTracker *hook.MultiHookTracker
}
func (ctx *finalizerContext) execute() (results.Result, results.Result) { //nolint:unparam //temporarily ignore the lint report: result 0 is always nil (unparam)
@@ -247,6 +253,9 @@ func (ctx *finalizerContext) execute() (results.Result, results.Result) { //noli
pdpErrs := ctx.patchDynamicPVWithVolumeInfo()
errs.Merge(&pdpErrs)
rehErrs := ctx.WaitRestoreExecHook()
errs.Merge(&rehErrs)
return warnings, errs
}
@@ -373,3 +382,45 @@ func needPatch(newPV *v1.PersistentVolume, pvInfo *volume.PVInfo) bool {
return false
}
// WaitRestoreExecHook waits for restore exec hooks to finish then update the hook execution results
func (ctx *finalizerContext) WaitRestoreExecHook() (errs results.Result) {
log := ctx.logger.WithField("restore", ctx.restore.Name)
log.Info("Waiting for restore exec hooks starts")
// wait for restore exec hooks to finish
err := wait.PollUntilContextCancel(context.Background(), 1*time.Second, true, func(context.Context) (bool, error) {
log.Debug("Checking the progress of hooks execution")
if ctx.multiHookTracker.IsComplete(ctx.restore.Name) {
return true, nil
}
return false, nil
})
if err != nil {
errs.Add(ctx.restore.Namespace, err)
return errs
}
log.Info("Done waiting for restore exec hooks starts")
for _, ei := range ctx.multiHookTracker.HookErrs(ctx.restore.Name) {
errs.Add(ei.Namespace, ei.Err)
}
// update hooks execution status
updated := ctx.restore.DeepCopy()
if updated.Status.HookStatus == nil {
updated.Status.HookStatus = &velerov1api.HookStatus{}
}
updated.Status.HookStatus.HooksAttempted, updated.Status.HookStatus.HooksFailed = ctx.multiHookTracker.Stat(ctx.restore.Name)
log.Debugf("hookAttempted: %d, hookFailed: %d", updated.Status.HookStatus.HooksAttempted, updated.Status.HookStatus.HooksFailed)
if err := kubeutil.PatchResource(ctx.restore, updated, ctx.crClient); err != nil {
log.WithError(errors.WithStack((err))).Error("Updating restore status")
errs.Add(ctx.restore.Namespace, err)
}
// delete the hook data for this restore
ctx.multiHookTracker.Delete(ctx.restore.Name)
return errs
}

View File

@@ -18,6 +18,7 @@ package controller
import (
"context"
"fmt"
"testing"
"time"
@@ -34,6 +35,7 @@ import (
corev1api "k8s.io/api/core/v1"
crclient "sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/internal/hook"
"github.com/vmware-tanzu/velero/internal/volume"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/builder"
@@ -135,6 +137,7 @@ func TestRestoreFinalizerReconcile(t *testing.T) {
NewFakeSingleObjectBackupStoreGetter(backupStore),
metrics.NewServerMetrics(),
fakeClient,
hook.NewMultiHookTracker(),
)
r.clock = testclocks.NewFakeClock(now)
@@ -196,6 +199,7 @@ func TestUpdateResult(t *testing.T) {
NewFakeSingleObjectBackupStoreGetter(backupStore),
metrics.NewServerMetrics(),
fakeClient,
hook.NewMultiHookTracker(),
)
restore := builder.ForRestore(velerov1api.DefaultNamespace, "restore-1").Result()
res := map[string]results.Result{"warnings": {}, "errors": {}}
@@ -454,3 +458,99 @@ func TestPatchDynamicPVWithVolumeInfo(t *testing.T) {
}
}
}
func TestWaitRestoreExecHook(t *testing.T) {
hookTracker1 := hook.NewMultiHookTracker()
restoreName1 := "restore1"
hookTracker2 := hook.NewMultiHookTracker()
restoreName2 := "restore2"
hookTracker2.Add(restoreName2, "ns", "pod", "con1", "s1", "h1", "")
hookTracker2.Record(restoreName2, "ns", "pod", "con1", "s1", "h1", "", false, nil)
hookTracker3 := hook.NewMultiHookTracker()
restoreName3 := "restore3"
podNs, podName, container, source, hookName := "ns", "pod", "con1", "s1", "h1"
hookFailed, hookErr := true, fmt.Errorf("hook failed")
hookTracker3.Add(restoreName3, podNs, podName, container, source, hookName, hook.PhasePre)
tests := []struct {
name string
hookTracker *hook.MultiHookTracker
restore *velerov1api.Restore
expectedHooksAttempted int
expectedHooksFailed int
expectedHookErrs int
waitSec int
podName string
podNs string
Container string
Source string
hookName string
hookFailed bool
hookErr error
}{
{
name: "no restore exec hooks",
hookTracker: hookTracker1,
restore: builder.ForRestore(velerov1api.DefaultNamespace, restoreName1).Result(),
expectedHooksAttempted: 0,
expectedHooksFailed: 0,
expectedHookErrs: 0,
},
{
name: "1 restore exec hook having been executed",
hookTracker: hookTracker2,
restore: builder.ForRestore(velerov1api.DefaultNamespace, restoreName2).Result(),
expectedHooksAttempted: 1,
expectedHooksFailed: 0,
expectedHookErrs: 0,
},
{
name: "1 restore exec hook to be executed",
hookTracker: hookTracker3,
restore: builder.ForRestore(velerov1api.DefaultNamespace, restoreName3).Result(),
waitSec: 2,
expectedHooksAttempted: 1,
expectedHooksFailed: 1,
expectedHookErrs: 1,
podName: podName,
podNs: podNs,
Container: container,
Source: source,
hookName: hookName,
hookFailed: hookFailed,
hookErr: hookErr,
},
}
for _, tc := range tests {
var (
fakeClient = velerotest.NewFakeControllerRuntimeClientBuilder(t).Build()
logger = velerotest.NewLogger()
)
ctx := &finalizerContext{
logger: logger,
crClient: fakeClient,
restore: tc.restore,
multiHookTracker: tc.hookTracker,
}
require.NoError(t, ctx.crClient.Create(context.Background(), tc.restore))
if tc.waitSec > 0 {
go func() {
time.Sleep(time.Second * time.Duration(tc.waitSec))
tc.hookTracker.Record(tc.restore.Name, tc.podNs, tc.podName, tc.Container, tc.Source, tc.hookName, hook.PhasePre, tc.hookFailed, tc.hookErr)
}()
}
errs := ctx.WaitRestoreExecHook()
assert.Len(t, errs.Namespaces, tc.expectedHookErrs)
updated := &velerov1api.Restore{}
err := ctx.crClient.Get(context.Background(), crclient.ObjectKey{Namespace: velerov1api.DefaultNamespace, Name: tc.restore.Name}, updated)
assert.NoError(t, err)
assert.Equal(t, tc.expectedHooksAttempted, updated.Status.HookStatus.HooksAttempted)
assert.Equal(t, tc.expectedHooksFailed, updated.Status.HookStatus.HooksFailed)
}
}