Merge pull request #7022 from allenxu404/i6721

Fix inconsistent behavior of Backup and Restore hook execution
This commit is contained in:
Anshul Ahuja
2023-11-06 14:01:30 +05:30
committed by GitHub
5 changed files with 31 additions and 26 deletions

View File

@@ -50,6 +50,11 @@ type DefaultListWatchFactory struct {
PodsGetter cache.Getter
}
type HookErrInfo struct {
Namespace string
Err error
}
func (d *DefaultListWatchFactory) NewListWatch(namespace string, selector fields.Selector) cache.ListerWatcher {
return cache.NewListWatchFromClient(d.PodsGetter, "pods", namespace, selector)
}
@@ -158,8 +163,8 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
if hook.Hook.WaitTimeout.Duration != 0 && time.Since(waitStart) > hook.Hook.WaitTimeout.Duration {
err := fmt.Errorf("hook %s in container %s expired before executing", hook.HookName, hook.Hook.Container)
hookLog.Error(err)
errors = append(errors, err)
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
errors = append(errors, err)
cancel()
return
}
@@ -172,8 +177,9 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
}
if err := e.PodCommandExecutor.ExecutePodCommand(hookLog, podMap, pod.Namespace, pod.Name, hook.HookName, eh); err != nil {
hookLog.WithError(err).Error("Error executing hook")
err = fmt.Errorf("hook %s in container %s failed to execute, err: %v", hook.HookName, hook.Hook.Container, err)
errors = append(errors, err)
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
errors = append(errors, err)
cancel()
return
}
@@ -204,10 +210,9 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
podWatcher.Run(ctx.Done())
// There are some cases where this function could return with unexecuted hooks: the pod may
// be deleted, a hook with OnError mode Fail could fail, or it may timeout waiting for
// be deleted, a hook could fail, or it may timeout waiting for
// containers to become ready.
// Each unexecuted hook is logged as an error but only hooks with OnError mode Fail return
// an error from this function.
// Each unexecuted hook is logged as an error and this error will be returned from this function.
for _, hooks := range byContainer {
for _, hook := range hooks {
if hook.executed {
@@ -222,9 +227,7 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
},
)
hookLog.Error(err)
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
errors = append(errors, err)
}
errors = append(errors, err)
}
}

View File

@@ -209,10 +209,10 @@ func TestWaitExecHandleHooks(t *testing.T) {
Result(),
},
},
expectedErrors: []error{errors.New("pod hook error")},
expectedErrors: []error{errors.New("hook <from-annotation> in container container1 failed to execute, err: pod hook error")},
},
{
name: "should return no error when hook from annotation fails with on error mode continue",
name: "should return error when hook from annotation fails with on error mode continue",
initialPod: builder.ForPod("default", "my-pod").
ObjectMeta(builder.WithAnnotations(
podRestoreHookCommandAnnotationKey, "/usr/bin/foo",
@@ -278,7 +278,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
Result(),
},
},
expectedErrors: nil,
expectedErrors: []error{errors.New("hook <from-annotation> in container container1 failed to execute, err: pod hook error")},
},
{
name: "should return no error when hook from annotation executes after 10ms wait for container to start",
@@ -422,7 +422,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
},
},
{
name: "should return no error when spec hook with wait timeout expires with OnError mode Continue",
name: "should return error when spec hook with wait timeout expires with OnError mode Continue",
groupResource: "pods",
initialPod: builder.ForPod("default", "my-pod").
Containers(&v1.Container{
@@ -435,7 +435,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
},
}).
Result(),
expectedErrors: nil,
expectedErrors: []error{errors.New("hook my-hook-1 in container container1 in pod default/my-pod not executed: context deadline exceeded")},
byContainer: map[string][]PodExecRestoreHook{
"container1": {
{
@@ -515,8 +515,8 @@ func TestWaitExecHandleHooks(t *testing.T) {
sharedHooksContextTimeout: time.Millisecond,
},
{
name: "should return no error when shared hooks context is canceled before spec hook with OnError mode Continue executes",
expectedErrors: nil,
name: "should return error when shared hooks context is canceled before spec hook with OnError mode Continue executes",
expectedErrors: []error{errors.New("hook my-hook-1 in container container1 in pod default/my-pod not executed: context deadline exceeded")},
groupResource: "pods",
initialPod: builder.ForPod("default", "my-pod").
Containers(&v1.Container{