Merge pull request #6918 from Ripolin/main

Add WaitForReady flag to check container readiness state before exec a hook
This commit is contained in:
Xun Jiang/Bruce Jiang
2023-10-15 13:27:34 +08:00
committed by GitHub
7 changed files with 182 additions and 77 deletions

View File

@@ -0,0 +1 @@
Optional check if targeted container is ready before executing a hook

View File

@@ -19,6 +19,7 @@ package hook
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
@@ -37,6 +38,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/kuberesource"
"github.com/vmware-tanzu/velero/pkg/podexec"
"github.com/vmware-tanzu/velero/pkg/restorehelper"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/collections"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
@@ -61,6 +63,7 @@ const (
podRestoreHookOnErrorAnnotationKey = "post.hook.restore.velero.io/on-error"
podRestoreHookTimeoutAnnotationKey = "post.hook.restore.velero.io/exec-timeout"
podRestoreHookWaitTimeoutAnnotationKey = "post.hook.restore.velero.io/wait-timeout"
podRestoreHookWaitForReadyAnnotationKey = "post.hook.restore.velero.io/wait-for-ready"
podRestoreHookInitContainerImageAnnotationKey = "init.hook.restore.velero.io/container-image"
podRestoreHookInitContainerNameAnnotationKey = "init.hook.restore.velero.io/container-name"
podRestoreHookInitContainerCommandAnnotationKey = "init.hook.restore.velero.io/command"
@@ -477,12 +480,23 @@ func getPodExecRestoreHookFromAnnotations(annotations map[string]string, log log
}
}
waitForReadyString := annotations[podRestoreHookWaitForReadyAnnotationKey]
waitForReady := boolptr.False()
if waitForReadyString != "" {
var err error
*waitForReady, err = strconv.ParseBool(waitForReadyString)
if err != nil {
log.Warn(errors.Wrapf(err, "Unable to parse wait for ready %s, ignoring", waitForReadyString))
}
}
return &velerov1api.ExecRestoreHook{
Container: container,
Command: parseStringToCommand(commandValue),
OnError: onError,
ExecTimeout: metav1.Duration{Duration: execTimeout},
WaitTimeout: metav1.Duration{Duration: waitTimeout},
Container: container,
Command: parseStringToCommand(commandValue),
OnError: onError,
ExecTimeout: metav1.Duration{Duration: execTimeout},
WaitTimeout: metav1.Duration{Duration: waitTimeout},
WaitForReady: waitForReady,
}
}
@@ -542,6 +556,10 @@ func GroupRestoreExecHooks(
Hook: *rh.Exec,
HookSource: "backupSpec",
}
// default to false if attr WaitForReady not set
if named.Hook.WaitForReady == nil {
named.Hook.WaitForReady = boolptr.False()
}
// default to first container in pod if unset, without mutating resource restore hook
if named.Hook.Container == "" {
named.Hook.Container = pod.Spec.Containers[0].Name

View File

@@ -36,6 +36,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/builder"
"github.com/vmware-tanzu/velero/pkg/kuberesource"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/collections"
)
@@ -724,7 +725,8 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookCommandAnnotationKey: "/usr/bin/foo",
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"/usr/bin/foo"},
Command: []string{"/usr/bin/foo"},
WaitForReady: boolptr.False(),
},
},
{
@@ -733,7 +735,8 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookCommandAnnotationKey: `["a","b","c"]`,
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"a", "b", "c"},
Command: []string{"a", "b", "c"},
WaitForReady: boolptr.False(),
},
},
{
@@ -743,8 +746,9 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookOnErrorAnnotationKey: string(velerov1api.HookErrorModeContinue),
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
WaitForReady: boolptr.False(),
},
},
{
@@ -754,8 +758,9 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookOnErrorAnnotationKey: string(velerov1api.HookErrorModeFail),
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeFail,
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeFail,
WaitForReady: boolptr.False(),
},
},
{
@@ -766,9 +771,10 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookWaitTimeoutAnnotationKey: "1h",
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"/usr/bin/foo"},
ExecTimeout: metav1.Duration{Duration: 45 * time.Second},
WaitTimeout: metav1.Duration{Duration: time.Hour},
Command: []string{"/usr/bin/foo"},
ExecTimeout: metav1.Duration{Duration: 45 * time.Second},
WaitTimeout: metav1.Duration{Duration: time.Hour},
WaitForReady: boolptr.False(),
},
},
{
@@ -778,8 +784,9 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookContainerAnnotationKey: "my-app",
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"/usr/bin/foo"},
Container: "my-app",
Command: []string{"/usr/bin/foo"},
Container: "my-app",
WaitForReady: boolptr.False(),
},
},
{
@@ -790,9 +797,10 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookTimeoutAnnotationKey: "none",
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"/usr/bin/foo"},
Container: "my-app",
ExecTimeout: metav1.Duration{Duration: 0},
Command: []string{"/usr/bin/foo"},
Container: "my-app",
ExecTimeout: metav1.Duration{Duration: 0},
WaitForReady: boolptr.False(),
},
},
{
@@ -803,9 +811,10 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) {
podRestoreHookWaitTimeoutAnnotationKey: "none",
},
expected: &velerov1api.ExecRestoreHook{
Command: []string{"/usr/bin/foo"},
Container: "my-app",
ExecTimeout: metav1.Duration{Duration: 0},
Command: []string{"/usr/bin/foo"},
Container: "my-app",
ExecTimeout: metav1.Duration{Duration: 0},
WaitForReady: boolptr.False(),
},
},
}
@@ -842,6 +851,7 @@ func TestGroupRestoreExecHooks(t *testing.T) {
podRestoreHookOnErrorAnnotationKey, string(velerov1api.HookErrorModeContinue),
podRestoreHookTimeoutAnnotationKey, "1s",
podRestoreHookWaitTimeoutAnnotationKey, "1m",
podRestoreHookWaitForReadyAnnotationKey, "true",
)).
Containers(&corev1api.Container{
Name: "container1",
@@ -853,11 +863,12 @@ func TestGroupRestoreExecHooks(t *testing.T) {
HookName: "<from-annotation>",
HookSource: "annotation",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
WaitForReady: boolptr.True(),
},
},
},
@@ -883,11 +894,12 @@ func TestGroupRestoreExecHooks(t *testing.T) {
HookName: "<from-annotation>",
HookSource: "annotation",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
WaitForReady: boolptr.False(),
},
},
},
@@ -923,11 +935,12 @@ func TestGroupRestoreExecHooks(t *testing.T) {
HookName: "hook1",
HookSource: "backupSpec",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
WaitForReady: boolptr.False(),
},
},
},
@@ -962,11 +975,12 @@ func TestGroupRestoreExecHooks(t *testing.T) {
HookName: "hook1",
HookSource: "backupSpec",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
WaitForReady: boolptr.False(),
},
},
},
@@ -1009,11 +1023,12 @@ func TestGroupRestoreExecHooks(t *testing.T) {
HookName: "<from-annotation>",
HookSource: "annotation",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
WaitForReady: boolptr.False(),
},
},
},
@@ -1105,11 +1120,12 @@ func TestGroupRestoreExecHooks(t *testing.T) {
RestoreHooks: []velerov1api.RestoreResourceHook{
{
Exec: &velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/aaa"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 4},
WaitTimeout: metav1.Duration{Duration: time.Minute * 4},
Container: "container1",
Command: []string{"/usr/bin/aaa"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 4},
WaitTimeout: metav1.Duration{Duration: time.Minute * 4},
WaitForReady: boolptr.True(),
},
},
},
@@ -1126,33 +1142,36 @@ func TestGroupRestoreExecHooks(t *testing.T) {
HookName: "hook1",
HookSource: "backupSpec",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeFail,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
Container: "container1",
Command: []string{"/usr/bin/foo"},
OnError: velerov1api.HookErrorModeFail,
ExecTimeout: metav1.Duration{Duration: time.Second},
WaitTimeout: metav1.Duration{Duration: time.Minute},
WaitForReady: boolptr.False(),
},
},
{
HookName: "hook1",
HookSource: "backupSpec",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/bar"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 2},
WaitTimeout: metav1.Duration{Duration: time.Minute * 2},
Container: "container1",
Command: []string{"/usr/bin/bar"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 2},
WaitTimeout: metav1.Duration{Duration: time.Minute * 2},
WaitForReady: boolptr.False(),
},
},
{
HookName: "hook2",
HookSource: "backupSpec",
Hook: velerov1api.ExecRestoreHook{
Container: "container1",
Command: []string{"/usr/bin/aaa"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 4},
WaitTimeout: metav1.Duration{Duration: time.Minute * 4},
Container: "container1",
Command: []string{"/usr/bin/aaa"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 4},
WaitTimeout: metav1.Duration{Duration: time.Minute * 4},
WaitForReady: boolptr.True(),
},
},
},
@@ -1161,11 +1180,12 @@ func TestGroupRestoreExecHooks(t *testing.T) {
HookName: "hook1",
HookSource: "backupSpec",
Hook: velerov1api.ExecRestoreHook{
Container: "container2",
Command: []string{"/usr/bin/baz"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 3},
WaitTimeout: metav1.Duration{Duration: time.Second * 3},
Container: "container2",
Command: []string{"/usr/bin/baz"},
OnError: velerov1api.HookErrorModeContinue,
ExecTimeout: metav1.Duration{Duration: time.Second * 3},
WaitTimeout: metav1.Duration{Duration: time.Second * 3},
WaitForReady: boolptr.False(),
},
},
},

View File

@@ -29,6 +29,7 @@ import (
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/podexec"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
@@ -126,8 +127,8 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
}
for containerName, hooks := range byContainer {
if !isContainerRunning(newPod, containerName) {
podLog.Infof("Container %s is not running: post-restore hooks will not yet be executed", containerName)
if !isContainerUp(newPod, containerName, hooks) {
podLog.Infof("Container %s is not up: post-restore hooks will not yet be executed", containerName)
continue
}
podMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(newPod)
@@ -243,14 +244,24 @@ func podHasContainer(pod *v1.Pod, containerName string) bool {
return false
}
func isContainerRunning(pod *v1.Pod, containerName string) bool {
func isContainerUp(pod *v1.Pod, containerName string, hooks []PodExecRestoreHook) bool {
if pod == nil {
return false
}
var waitForReady bool
for _, hook := range hooks {
if boolptr.IsSetToTrue(hook.Hook.WaitForReady) {
waitForReady = true
break
}
}
for _, cs := range pod.Status.ContainerStatuses {
if cs.Name != containerName {
continue
}
if waitForReady {
return cs.Ready
}
return cs.State.Running != nil
}

View File

@@ -35,6 +35,7 @@ import (
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/builder"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
)
type fakeListWatchFactory struct {
@@ -790,12 +791,13 @@ func TestPodHasContainer(t *testing.T) {
}
}
func TestIsContainerRunning(t *testing.T) {
func TestIsContainerUp(t *testing.T) {
tests := []struct {
name string
pod *v1.Pod
container string
expect bool
hooks []PodExecRestoreHook
}{
{
name: "should return true when running",
@@ -809,6 +811,49 @@ func TestIsContainerRunning(t *testing.T) {
},
}).
Result(),
hooks: []PodExecRestoreHook{},
},
{
name: "should return false when running but not ready",
container: "container1",
expect: false,
pod: builder.ForPod("default", "my-pod").
ContainerStatuses(&v1.ContainerStatus{
Name: "container1",
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{},
},
Ready: false,
}).
Result(),
hooks: []PodExecRestoreHook{
{
Hook: velerov1api.ExecRestoreHook{
WaitForReady: boolptr.True(),
},
},
},
},
{
name: "should return true when running and ready",
container: "container1",
expect: true,
pod: builder.ForPod("default", "my-pod").
ContainerStatuses(&v1.ContainerStatus{
Name: "container1",
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{},
},
Ready: true,
}).
Result(),
hooks: []PodExecRestoreHook{
{
Hook: velerov1api.ExecRestoreHook{
WaitForReady: boolptr.True(),
},
},
},
},
{
name: "should return false when no state is set",
@@ -820,6 +865,7 @@ func TestIsContainerRunning(t *testing.T) {
State: v1.ContainerState{},
}).
Result(),
hooks: []PodExecRestoreHook{},
},
{
name: "should return false when waiting",
@@ -833,6 +879,7 @@ func TestIsContainerRunning(t *testing.T) {
},
}).
Result(),
hooks: []PodExecRestoreHook{},
},
{
name: "should return true when running and first container is terminated",
@@ -852,11 +899,12 @@ func TestIsContainerRunning(t *testing.T) {
},
}).
Result(),
hooks: []PodExecRestoreHook{},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := isContainerRunning(test.pod, test.container)
actual := isContainerUp(test.pod, test.container, test.hooks)
assert.Equal(t, actual, test.expect)
})
}

View File

@@ -214,6 +214,11 @@ type ExecRestoreHook struct {
// before attempting to run the command.
// +optional
WaitTimeout metav1.Duration `json:"waitTimeout,omitempty"`
// WaitForReady ensures command will be launched when container is Ready instead of Running.
// +optional
// +nullable
WaitForReady *bool `json:"waitForReady,omitempty"`
}
// InitRestoreHook is a hook that adds an init container to a PodSpec to run commands before the

View File

@@ -181,6 +181,8 @@ Below are the annotations that can be added to a pod to specify exec restore hoo
* How long to wait once execution begins. Defaults is 30 seconds. Optional.
* `post.hook.restore.velero.io/wait-timeout`
* How long to wait for a container to become ready. This should be long enough for the container to start plus any preceding hooks in the same container to complete. The wait timeout begins when the container is restored and may require time for the image to pull and volumes to mount. If not set the restore will wait indefinitely. Optional.
* `post.hook.restore.velero.io/wait-for-ready`
* String representation of a boolean that ensure command will be launched when underlying container is fully [Ready](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes). Use with caution because if only one restore hook for a pod consists of `WaitForReady` flag as "true", all the other hook executions for that pod, whatever their origin (`Backup` or `Restore` CRD), will wait for `Ready` state too. Any value except "true" will be considered as "false". Defaults is false. Optional.
#### Exec Restore Hooks As Pod Annotation Example