Merge pull request #9791 from christian-schlichtherle/fix/dataupload-delete-foreign-backup

Fix DataUploadDeleteAction creating CMs for foreign DataUploads
This commit is contained in:
lyndon-li
2026-05-22 14:41:44 +08:00
committed by GitHub
3 changed files with 211 additions and 0 deletions

View File

@@ -0,0 +1 @@
Fix DataUploadDeleteAction creating snapshot-info ConfigMaps labeled with the wrong backup name when a DataUpload CR from another backup is incidentally captured in the backup tarball, which caused Kopia snapshots to be leaked in object storage on expiry of the real owning backup.

View File

@@ -14,6 +14,7 @@ import (
velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/label"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
repotypes "github.com/vmware-tanzu/velero/pkg/repository/types"
)
@@ -35,6 +36,44 @@ func (d *DataUploadDeleteAction) Execute(input *velero.DeleteItemActionExecuteIn
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(input.Item.UnstructuredContent(), &du); err != nil {
return errors.WithStack(errors.Wrapf(err, "failed to convert input.Item from unstructured"))
}
// Only create a snapshot-info ConfigMap when the DataUpload's owning
// backup (its velero.io/backup-name label) matches the backup currently
// being deleted. Two other cases reach this code path and must be
// skipped, because the resulting CM would be unmatchable and only adds
// etcd churn:
//
// 1. The label is missing. We have no verifiable owner, so a CM created
// with the executing backup's label is a guess that deleteMovedSnapshots
// cannot rely on.
// 2. The label names a different backup. Velero does not support
// self-protection, so this almost always means the velero namespace
// was captured in a backup tarball and the DataUpload CR belongs to
// an unrelated backup. Creating a CM labeled with the executing
// backup mislabels the snapshot and causes the real owning backup's
// deleteMovedSnapshots query to miss it, leaking the Kopia snapshot
// in the object store.
//
// Both cases warn so misconfigured installs surface in logs.
owner := du.Labels[velerov1.BackupNameLabel]
switch {
case owner == "":
d.logger.Warnf(
"DataUpload %q has no %q label, so its owning backup cannot be verified; "+
"skipping snapshot-info ConfigMap creation because a CM without a verifiable owner "+
"cannot be matched back to its snapshot at backup deletion time.",
du.Name, velerov1.BackupNameLabel,
)
return nil
case owner != label.GetValidName(input.Backup.Name):
d.logger.Warnf(
"DataUpload %q belongs to backup %q but is being deleted under backup %q; "+
"this almost always means the velero namespace was included in a backup tarball. "+
"Velero does not support self-protection — exclude the velero namespace from your schedules. "+
"Skipping snapshot-info ConfigMap creation to avoid mislabeling.",
du.Name, owner, input.Backup.Name,
)
return nil
}
cm := genConfigmap(input.Backup, *du)
if cm == nil {
// will not fail the backup deletion

View File

@@ -0,0 +1,171 @@
/*
Copyright the Velero contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"fmt"
"strings"
"testing"
"github.com/sirupsen/logrus"
logrustest "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1api "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
crclient "sigs.k8s.io/controller-runtime/pkg/client"
velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/builder"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
)
func toUnstructured(t *testing.T, du *velerov2alpha1.DataUpload) runtime.Unstructured {
t.Helper()
m, err := runtime.DefaultUnstructuredConverter.ToUnstructured(du)
require.NoError(t, err)
return &unstructured.Unstructured{Object: m}
}
func newCompletedDataUpload(name, ownerBackup string) *velerov2alpha1.DataUpload {
du := &velerov2alpha1.DataUpload{
TypeMeta: metav1.TypeMeta{
APIVersion: velerov2alpha1.SchemeGroupVersion.String(),
Kind: "DataUpload",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: name,
},
Spec: velerov2alpha1.DataUploadSpec{
SnapshotType: velerov2alpha1.SnapshotTypeCSI,
SourcePVC: "my-pvc",
SourceNamespace: "app",
BackupStorageLocation: "default",
DataMover: "velero",
},
Status: velerov2alpha1.DataUploadStatus{
Phase: velerov2alpha1.DataUploadPhaseCompleted,
SnapshotID: "kopia-snapshot-id",
},
}
if ownerBackup != "" {
du.Labels = map[string]string{velerov1.BackupNameLabel: ownerBackup}
}
return du
}
func TestDataUploadDeleteActionAppliesTo(t *testing.T) {
a := NewDataUploadDeleteAction(logrus.StandardLogger(), nil)
selector, err := a.AppliesTo()
require.NoError(t, err)
require.Equal(t, velero.ResourceSelector{IncludedResources: []string{"datauploads.velero.io"}}, selector)
}
func TestDataUploadDeleteActionExecute(t *testing.T) {
tests := []struct {
name string
duName string
duOwnerBackup string // value placed in velero.io/backup-name label on the DataUpload
executingBackup string // name of the Backup being deleted (input.Backup.Name)
wantConfigMap bool
wantWarnContains string // substring expected in a warn-level log entry; empty means no warn expected
}{
{
name: "DataUpload owned by the executing backup creates a snapshot-info ConfigMap",
duName: "daily-backup-abcde",
duOwnerBackup: "daily-backup",
executingBackup: "daily-backup",
wantConfigMap: true,
wantWarnContains: "",
},
{
name: "DataUpload owned by a different backup is skipped and a warning is logged",
duName: "daily-backup-abcde",
duOwnerBackup: "daily-backup",
executingBackup: "hourly-backup",
wantConfigMap: false,
wantWarnContains: "velero namespace",
},
{
name: "DataUpload with no backup-name label is skipped and a warning is logged",
duName: "unlabeled-du",
duOwnerBackup: "",
executingBackup: "some-backup",
wantConfigMap: false,
wantWarnContains: "cannot be verified",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
crClient := velerotest.NewFakeControllerRuntimeClient(t)
logger, hook := logrustest.NewNullLogger()
logger.SetLevel(logrus.DebugLevel)
action := NewDataUploadDeleteAction(logger, crClient)
du := newCompletedDataUpload(tc.duName, tc.duOwnerBackup)
backup := builder.ForBackup("velero", tc.executingBackup).StorageLocation("default").Result()
err := action.Execute(&velero.DeleteItemActionExecuteInput{
Item: toUnstructured(t, du),
Backup: backup,
})
require.NoError(t, err)
cm := &corev1api.ConfigMap{}
getErr := crClient.Get(t.Context(), crclient.ObjectKey{
Namespace: backup.Namespace,
Name: fmt.Sprintf("%s-info", du.Name),
}, cm)
if tc.wantConfigMap {
require.NoError(t, getErr, "expected snapshot-info ConfigMap to be created")
assert.Equal(t, tc.executingBackup, cm.Labels[velerov1.BackupNameLabel])
assert.Equal(t, "true", cm.Labels[velerov1.DataUploadSnapshotInfoLabel])
} else {
require.Error(t, getErr)
assert.True(t, apierrors.IsNotFound(getErr),
"expected no ConfigMap to be created for foreign DataUpload, but got: %v", getErr)
}
// The action must surface DataUploads it cannot generate a useful
// snapshot-info ConfigMap for as warnings, so operators who
// accidentally included the velero namespace in a backup (or
// otherwise produced DataUploads without a verifiable owner) can
// detect the misconfiguration from logs instead of having the
// case silently swallowed.
var sawWarn bool
for _, entry := range hook.AllEntries() {
if entry.Level == logrus.WarnLevel &&
strings.Contains(entry.Message, tc.duName) &&
(tc.wantWarnContains == "" || strings.Contains(entry.Message, tc.wantWarnContains)) {
sawWarn = true
break
}
}
assert.Equal(t, tc.wantWarnContains != "", sawWarn,
"unexpected warn log presence (wantContains=%q, got=%v); entries=%v",
tc.wantWarnContains, sawWarn, hook.AllEntries())
})
}
}