mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-05-28 18:10:23 +00:00
Warn instead of silently skipping foreign DataUploads
Velero does not support self-protection: the velero namespace must never be captured in a backup tarball. When it is, the tarball can contain DataUpload CRs belonging to other backups, and the previous revision of this change silently swallowed that case in the DataUploadDeleteAction. Per maintainer feedback, the action should make the misconfiguration detectable rather than silent. Emit a warn-level log naming the DataUpload, its owning backup-name label, and the executing backup, and call out that the velero namespace should be excluded from schedules. Continue to skip the snapshot-info ConfigMap creation so that a mislabeled CM does not mask the real owning backup's snapshot on deletion. The test for the foreign-backup case now also asserts the warn is emitted via a logrus test hook. Signed-off-by: Christian Schlichtherle <cs@bsure-analytics.de>
This commit is contained in:
@@ -36,17 +36,24 @@ func (d *DataUploadDeleteAction) Execute(input *velero.DeleteItemActionExecuteIn
|
||||
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(input.Item.UnstructuredContent(), &du); err != nil {
|
||||
return errors.WithStack(errors.Wrapf(err, "failed to convert input.Item from unstructured"))
|
||||
}
|
||||
// Skip DataUploads that do not belong to the backup being deleted. The
|
||||
// backup tarball may incidentally include DataUpload CRs from the velero
|
||||
// namespace that belong to a different backup (e.g. when an hourly
|
||||
// schedule with snapshotMoveData=false captures the velero namespace
|
||||
// containing a daily schedule's DataUploads). Creating a snapshot-info
|
||||
// ConfigMap labeled with the wrong backup name causes the real owning
|
||||
// backup's deleteMovedSnapshots query to miss it, leaking the Kopia
|
||||
// snapshot in the object store.
|
||||
// Detect DataUploads that do not belong to the backup being deleted.
|
||||
// Velero does not support self-protection: the velero namespace should
|
||||
// never be captured in a backup tarball. When it is (e.g. an operator
|
||||
// schedule covers the velero namespace), the tarball can contain
|
||||
// DataUpload CRs belonging to *other* backups. Creating a snapshot-info
|
||||
// ConfigMap labeled with the executing backup's name in that case
|
||||
// mislabels the snapshot and causes the real owning backup's
|
||||
// deleteMovedSnapshots query to miss it, leaking the Kopia snapshot in
|
||||
// the object store. Log a warning so misconfigured installs are visible,
|
||||
// and skip the snapshot-info ConfigMap creation to avoid mislabeling.
|
||||
if owner := du.Labels[velerov1.BackupNameLabel]; owner != "" && owner != label.GetValidName(input.Backup.Name) {
|
||||
d.logger.Infof("Skipping DataUpload %s/%s: belongs to backup %q, not %q",
|
||||
du.Namespace, du.Name, owner, input.Backup.Name)
|
||||
d.logger.Warnf(
|
||||
"DataUpload %q belongs to backup %q but is being deleted under backup %q; "+
|
||||
"this almost always means the velero namespace was included in a backup tarball. "+
|
||||
"Velero does not support self-protection — exclude the velero namespace from your schedules. "+
|
||||
"Skipping snapshot-info ConfigMap creation to avoid mislabeling.",
|
||||
du.Name, owner, input.Backup.Name,
|
||||
)
|
||||
return nil
|
||||
}
|
||||
cm := genConfigmap(input.Backup, *du)
|
||||
|
||||
@@ -18,9 +18,11 @@ package datamover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
logrustest "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
corev1api "k8s.io/api/core/v1"
|
||||
@@ -81,11 +83,12 @@ func TestDataUploadDeleteActionAppliesTo(t *testing.T) {
|
||||
|
||||
func TestDataUploadDeleteActionExecute(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
duName string
|
||||
duOwnerBackup string // value placed in velero.io/backup-name label on the DataUpload
|
||||
executingBackup string // name of the Backup being deleted (input.Backup.Name)
|
||||
wantConfigMap bool
|
||||
name string
|
||||
duName string
|
||||
duOwnerBackup string // value placed in velero.io/backup-name label on the DataUpload
|
||||
executingBackup string // name of the Backup being deleted (input.Backup.Name)
|
||||
wantConfigMap bool
|
||||
wantWarn bool // whether a warn-level log about a foreign DataUpload is expected
|
||||
}{
|
||||
{
|
||||
name: "DataUpload owned by the executing backup creates a snapshot-info ConfigMap",
|
||||
@@ -93,13 +96,15 @@ func TestDataUploadDeleteActionExecute(t *testing.T) {
|
||||
duOwnerBackup: "daily-backup",
|
||||
executingBackup: "daily-backup",
|
||||
wantConfigMap: true,
|
||||
wantWarn: false,
|
||||
},
|
||||
{
|
||||
name: "DataUpload owned by a different backup is skipped (no ConfigMap created)",
|
||||
name: "DataUpload owned by a different backup is skipped and a warning is logged",
|
||||
duName: "daily-backup-abcde",
|
||||
duOwnerBackup: "daily-backup",
|
||||
executingBackup: "hourly-backup",
|
||||
wantConfigMap: false,
|
||||
wantWarn: true,
|
||||
},
|
||||
{
|
||||
name: "DataUpload with no backup-name label falls through (legacy behavior preserved)",
|
||||
@@ -107,13 +112,16 @@ func TestDataUploadDeleteActionExecute(t *testing.T) {
|
||||
duOwnerBackup: "",
|
||||
executingBackup: "some-backup",
|
||||
wantConfigMap: true,
|
||||
wantWarn: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
crClient := velerotest.NewFakeControllerRuntimeClient(t)
|
||||
action := NewDataUploadDeleteAction(logrus.StandardLogger(), crClient)
|
||||
logger, hook := logrustest.NewNullLogger()
|
||||
logger.SetLevel(logrus.DebugLevel)
|
||||
action := NewDataUploadDeleteAction(logger, crClient)
|
||||
|
||||
du := newCompletedDataUpload(tc.duName, tc.duOwnerBackup)
|
||||
backup := builder.ForBackup("velero", tc.executingBackup).StorageLocation("default").Result()
|
||||
@@ -139,6 +147,23 @@ func TestDataUploadDeleteActionExecute(t *testing.T) {
|
||||
assert.True(t, apierrors.IsNotFound(getErr),
|
||||
"expected no ConfigMap to be created for foreign DataUpload, but got: %v", getErr)
|
||||
}
|
||||
|
||||
// The action must surface foreign-backup DataUploads as warnings so
|
||||
// operators who accidentally included the velero namespace in a
|
||||
// backup can detect the misconfiguration from logs, instead of
|
||||
// having the case silently swallowed.
|
||||
var sawForeignWarn bool
|
||||
for _, entry := range hook.AllEntries() {
|
||||
if entry.Level == logrus.WarnLevel &&
|
||||
strings.Contains(entry.Message, "velero namespace") &&
|
||||
strings.Contains(entry.Message, tc.duName) {
|
||||
sawForeignWarn = true
|
||||
break
|
||||
}
|
||||
}
|
||||
assert.Equal(t, tc.wantWarn, sawForeignWarn,
|
||||
"unexpected foreign-backup warn log presence (want=%v, got=%v); entries=%v",
|
||||
tc.wantWarn, sawForeignWarn, hook.AllEntries())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user