PVR action to remove restore-wait init container on restore (#8880)

This PR fixes issue #8870 where Velero was unnecessarily adding the restore-wait init container when restoring pods with volumes that were backed up using native datamover or CSI.

When restoring pods with volumes, Velero was always adding the restore-wait init container, even when the volumes were backed up using native datamover or CSI and didn't need file system restores. This was causing unnecessary overhead and potential issues.

PVR action to remove restore-wait init container on restore

Changes:
- Remove ALL existing restore-wait init containers before deciding whether to add a new one
- This covers both scenarios: when no file system restore is needed AND when preventing duplicates
- Simplify the add logic since we've already cleaned up existing containers
- Add better logging to show how many containers were removed

Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
This commit is contained in:
Tiger Kaovilai
2025-07-20 23:03:42 -04:00
committed by GitHub
parent 06d305ea47
commit 2b787f5d3d
3 changed files with 347 additions and 14 deletions

View File

@@ -111,16 +111,34 @@ func (a *PodVolumeRestoreAction) Execute(input *velero.RestoreItemActionExecuteI
for i := range podVolumeBackupList.Items {
podVolumeBackups = append(podVolumeBackups, &podVolumeBackupList.Items[i])
}
// Remove all existing restore-wait init containers first to prevent duplicates
// This ensures that even if the pod was previously restored with file system backup
// but now backed up with native datamover or CSI, the unnecessary init container is removed
var filteredInitContainers []corev1api.Container
removedCount := 0
for _, initContainer := range pod.Spec.InitContainers {
if initContainer.Name != restorehelper.WaitInitContainer && initContainer.Name != restorehelper.WaitInitContainerLegacy {
filteredInitContainers = append(filteredInitContainers, initContainer)
} else {
removedCount++
}
}
pod.Spec.InitContainers = filteredInitContainers
if removedCount > 0 {
log.Infof("Removed %d existing restore-wait init container(s)", removedCount)
}
volumeSnapshots := podvolume.GetVolumeBackupsForPod(podVolumeBackups, &pod, podFromBackup.Namespace)
if len(volumeSnapshots) == 0 {
log.Debug("No pod volume backups found for pod")
return velero.NewRestoreItemActionExecuteOutput(input.Item), nil
res, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&pod)
if err != nil {
return nil, errors.Wrap(err, "unable to convert pod to runtime.Unstructured")
}
return velero.NewRestoreItemActionExecuteOutput(&unstructured.Unstructured{Object: res}), nil
}
log.Info("Pod volume backups for pod found")
// TODO we might want/need to get plugin config at the top of this method at some point; for now, wait
// until we know we're doing a restore before getting config.
log.Debugf("Getting plugin config")
config, err := common.GetPluginConfig(common.PluginKindRestoreItemAction, "velero.io/pod-volume-restore", a.client)
if err != nil {
@@ -190,11 +208,9 @@ func (a *PodVolumeRestoreAction) Execute(input *velero.RestoreItemActionExecuteI
initContainerBuilder.Command(getCommand(log, config))
initContainer := *initContainerBuilder.Result()
if len(pod.Spec.InitContainers) == 0 || (pod.Spec.InitContainers[0].Name != restorehelper.WaitInitContainer && pod.Spec.InitContainers[0].Name != restorehelper.WaitInitContainerLegacy) {
pod.Spec.InitContainers = append([]corev1api.Container{initContainer}, pod.Spec.InitContainers...)
} else {
pod.Spec.InitContainers[0] = initContainer
}
// Since we've already removed all restore-wait init containers above,
// we can simply prepend the new init container
pod.Spec.InitContainers = append([]corev1api.Container{initContainer}, pod.Spec.InitContainers...)
res, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&pod)
if err != nil {

View File

@@ -31,6 +31,9 @@ import (
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
crfake "sigs.k8s.io/controller-runtime/pkg/client/fake"
"context"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/builder"
@@ -38,6 +41,10 @@ import (
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
"github.com/vmware-tanzu/velero/pkg/util/kube"
"k8s.io/client-go/kubernetes/scheme"
"github.com/vmware-tanzu/velero/pkg/restorehelper"
)
func TestGetImage(t *testing.T) {
@@ -157,13 +164,28 @@ func TestPodVolumeRestoreActionExecute(t *testing.T) {
want *corev1api.Pod
}{
{
name: "Restoring pod with no other initContainers adds the restore initContainer",
pod: builder.ForPod("ns-1", "my-pod").ObjectMeta(
builder.WithAnnotations("snapshot.velero.io/myvol", "")).
name: "Restoring pod with no other initContainers adds the restore initContainer when volumes need file system restores",
pod: builder.ForPod("ns-1", "my-pod").
ObjectMeta(builder.WithAnnotations("snapshot.velero.io/myvol", "")).
Volumes(
builder.ForVolume("myvol").PersistentVolumeClaimSource("pvc-1").Result(),
).
Result(),
podVolumeBackups: []runtime.Object{
builder.ForPodVolumeBackup(veleroNs, "pvb-1").
PodName("my-pod").
PodNamespace("ns-1").
Volume("myvol").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, backupName)).
SnapshotID("foo").
Result(),
},
want: builder.ForPod("ns-1", "my-pod").
ObjectMeta(
builder.WithAnnotations("snapshot.velero.io/myvol", "")).
Volumes(
builder.ForVolume("myvol").PersistentVolumeClaimSource("pvc-1").Result(),
).
InitContainers(
newRestoreInitContainerBuilder(defaultRestoreHelperImage, "").
Resources(&resourceReqs).
@@ -172,15 +194,30 @@ func TestPodVolumeRestoreActionExecute(t *testing.T) {
Command([]string{"/velero-restore-helper"}).Result()).Result(),
},
{
name: "Restoring pod with other initContainers adds the restore initContainer as the first one",
name: "Restoring pod with other initContainers adds the restore initContainer as the first one when volumes need file system restores",
pod: builder.ForPod("ns-1", "my-pod").
ObjectMeta(
builder.WithAnnotations("snapshot.velero.io/myvol", "")).
Volumes(
builder.ForVolume("myvol").PersistentVolumeClaimSource("pvc-1").Result(),
).
InitContainers(builder.ForContainer("first-container", "").Result()).
Result(),
podVolumeBackups: []runtime.Object{
builder.ForPodVolumeBackup(veleroNs, "pvb-1").
PodName("my-pod").
PodNamespace("ns-1").
Volume("myvol").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, backupName)).
SnapshotID("foo").
Result(),
},
want: builder.ForPod("ns-1", "my-pod").
ObjectMeta(
builder.WithAnnotations("snapshot.velero.io/myvol", "")).
Volumes(
builder.ForVolume("myvol").PersistentVolumeClaimSource("pvc-1").Result(),
).
InitContainers(
newRestoreInitContainerBuilder(defaultRestoreHelperImage, "").
Resources(&resourceReqs).
@@ -277,17 +314,32 @@ func TestPodVolumeRestoreActionExecute(t *testing.T) {
Result(),
},
{
name: "Restoring pod with custom container SecurityContext uses this SecurityContext for the restore initContainer",
name: "Restoring pod with custom container SecurityContext uses this SecurityContext for the restore initContainer when volumes need file system restores",
pod: builder.ForPod("ns-1", "my-pod").
ObjectMeta(
builder.WithAnnotations("snapshot.velero.io/myvol", "")).
Volumes(
builder.ForVolume("myvol").PersistentVolumeClaimSource("pvc-1").Result(),
).
Containers(
builder.ForContainer("app-container", "app-image").
SecurityContext(&customSecurityContext).Result()).
Result(),
podVolumeBackups: []runtime.Object{
builder.ForPodVolumeBackup(veleroNs, "pvb-1").
PodName("my-pod").
PodNamespace("ns-1").
Volume("myvol").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, backupName)).
SnapshotID("foo").
Result(),
},
want: builder.ForPod("ns-1", "my-pod").
ObjectMeta(
builder.WithAnnotations("snapshot.velero.io/myvol", "")).
Volumes(
builder.ForVolume("myvol").PersistentVolumeClaimSource("pvc-1").Result(),
).
Containers(
builder.ForContainer("app-container", "app-image").
SecurityContext(&customSecurityContext).Result()).
@@ -427,3 +479,261 @@ func TestGetCommand(t *testing.T) {
})
}
}
// This tests that restore-wait is added when file system restore volume exists, nothing added otherwise,
// and removed if it exists but is not needed.
// issue: 8870
func TestPodVolumeRestoreActionExecuteWithFileSystemShouldAddWaitInitContainer(t *testing.T) {
tests := []struct {
name string
pod *corev1api.Pod
podFromBackup *corev1api.Pod
podVolumeBackups []*velerov1api.PodVolumeBackup
restore *velerov1api.Restore
expectedInitContainers int
expectedError error
}{
{
name: "no pod volume backups results in no init container",
pod: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podFromBackup: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podVolumeBackups: nil,
restore: builder.ForRestore("velero", "restore-1").Backup("test-backup").Result(),
expectedInitContainers: 0,
expectedError: nil,
},
{
name: "pod volume backups that don't match pod's volumes results in no init container",
pod: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podFromBackup: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podVolumeBackups: []*velerov1api.PodVolumeBackup{
builder.ForPodVolumeBackup("velero", "pvb-1").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, "test-backup")).
PodName("different-pod").
PodNamespace("ns").
Volume("volume-1").
SnapshotID("snapshot-1").
Result(),
},
restore: builder.ForRestore("velero", "restore-1").Backup("test-backup").Result(),
expectedInitContainers: 0,
expectedError: nil,
},
{
name: "matching pod volume backup results in init container being added",
pod: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podFromBackup: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podVolumeBackups: []*velerov1api.PodVolumeBackup{
builder.ForPodVolumeBackup("velero", "pvb-1").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, "test-backup")).
PodName("pod").
PodNamespace("ns").
Volume("volume-1").
SnapshotID("snapshot-1").
Result(),
},
restore: builder.ForRestore("velero", "restore-1").Backup("test-backup").Result(),
expectedInitContainers: 1,
expectedError: nil,
},
{
name: "matching pod volume backup with matching pod name and namespace results in init container being added",
pod: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podFromBackup: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podVolumeBackups: []*velerov1api.PodVolumeBackup{
builder.ForPodVolumeBackup("velero", "pvb-1").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, "test-backup")).
PodName("pod").
PodNamespace("ns").
Volume("volume-1").
SnapshotID("snapshot-1").
Result(),
},
restore: builder.ForRestore("velero", "restore-1").Backup("test-backup").Result(),
expectedInitContainers: 1,
expectedError: nil,
},
{
name: "existing init container is removed when no file system restore is needed",
pod: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
InitContainers(
builder.ForContainer(restorehelper.WaitInitContainer, "velero/velero:latest").
Command([]string{"/velero-restore-helper"}).
Args("restore-1").
Result(),
builder.ForContainer("another-init", "another-image").Result(),
).
Result(),
podFromBackup: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podVolumeBackups: []*velerov1api.PodVolumeBackup{
// This PVB doesn't match the pod's name, so needsFileSystemRestore will be false
builder.ForPodVolumeBackup("velero", "pvb-1").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, "test-backup")).
PodName("different-pod").
PodNamespace("ns").
Volume("volume-1").
SnapshotID("snapshot-1").
Result(),
},
restore: builder.ForRestore("velero", "restore-1").Backup("test-backup").Result(),
expectedInitContainers: 1, // Only the "another-init" container should remain
expectedError: nil,
},
{
name: "existing legacy init container is removed when no file system restore is needed",
pod: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
InitContainers(
builder.ForContainer(restorehelper.WaitInitContainerLegacy, "velero/velero:latest").
Command([]string{"/velero-restore-helper"}).
Args("restore-1").
Result(),
builder.ForContainer("another-init", "another-image").Result(),
).
Result(),
podFromBackup: builder.ForPod("ns", "pod").
ObjectMeta(builder.WithUID("pod-uid")).
Volumes(
builder.ForVolume("volume-1").PersistentVolumeClaimSource("pvc-1").Result(),
builder.ForVolume("volume-2").PersistentVolumeClaimSource("pvc-2").Result(),
).
Result(),
podVolumeBackups: []*velerov1api.PodVolumeBackup{
// This PVB doesn't match the pod's name, so needsFileSystemRestore will be false
builder.ForPodVolumeBackup("velero", "pvb-1").
ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, "test-backup")).
PodName("different-pod").
PodNamespace("ns").
Volume("volume-1").
SnapshotID("snapshot-1").
Result(),
},
restore: builder.ForRestore("velero", "restore-1").Backup("test-backup").Result(),
expectedInitContainers: 1, // Only the "another-init" container should remain
expectedError: nil,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Setup
var (
client = crfake.NewClientBuilder().Build()
crClient = client
)
// Register the PodVolumeBackup type with the scheme
require.NoError(t, velerov1api.AddToScheme(scheme.Scheme))
// Create the PodVolumeBackups in the fake client
for _, pvb := range tc.podVolumeBackups {
require.NoError(t, crClient.Create(context.Background(), pvb))
}
// Create a fake clientset
clientset := fake.NewSimpleClientset()
// Create the action
action := &PodVolumeRestoreAction{
logger: logrus.StandardLogger(),
client: clientset.CoreV1().ConfigMaps("velero"),
crClient: crClient,
veleroImage: "velero/velero:latest",
}
// Convert the pod to unstructured
podMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(tc.pod)
require.NoError(t, err)
podFromBackupMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(tc.podFromBackup)
require.NoError(t, err)
// Create the input
input := &velero.RestoreItemActionExecuteInput{
Item: &unstructured.Unstructured{Object: podMap},
ItemFromBackup: &unstructured.Unstructured{Object: podFromBackupMap},
Restore: tc.restore,
}
// Execute the action
output, err := action.Execute(input)
// Verify the results
if tc.expectedError != nil {
assert.Equal(t, tc.expectedError, err)
} else {
require.NoError(t, err)
// Convert the output back to a pod
outputPod := new(corev1api.Pod)
err = runtime.DefaultUnstructuredConverter.FromUnstructured(output.UpdatedItem.UnstructuredContent(), outputPod)
require.NoError(t, err)
// Check if the init container was added or removed as expected
assert.Len(t, outputPod.Spec.InitContainers, tc.expectedInitContainers, "Unexpected number of init containers")
}
})
}
}