Implement lazy per-namespace PVC-to-Pod caching for plugin path

This commit addresses reviewer feedback on PR #9441 regarding
concurrent backup caching concerns. Key changes:

1. Added lazy per-namespace caching for the CSI PVC BIA plugin path:
   - Added IsNamespaceBuilt() method to check if namespace is cached
   - Added BuildCacheForNamespace() for lazy, per-namespace cache building
   - Plugin builds cache incrementally as namespaces are encountered

2. Added NewVolumeHelperImplWithCache constructor for plugins:
   - Accepts externally-managed PVC-to-Pod cache
   - Follows pattern from PR #9226 (Scott Seago's design)

3. Plugin instance lifecycle clarification:
   - Plugin instances are unique per backup (created via newPluginManager)
   - Cleaned up via CleanupClients at backup completion
   - No mutex or backup UID tracking needed

4. Test coverage:
   - Added tests for IsNamespaceBuilt and BuildCacheForNamespace
   - Added tests for NewVolumeHelperImplWithCache constructor
   - Added test verifying cache usage for fs-backup determination

This maintains the O(N+M) complexity improvement from issue #9179
while addressing architectural concerns about concurrent access.

Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
This commit is contained in:
Shubham Pampattiwar
2025-12-16 12:28:47 -08:00
parent f4c4653c08
commit b7052c2cb1
6 changed files with 573 additions and 138 deletions

View File

@@ -20,7 +20,6 @@ import (
"context"
"fmt"
"strconv"
"sync"
"time"
"k8s.io/client-go/util/retry"
@@ -59,6 +58,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/csi"
kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
podvolumeutil "github.com/vmware-tanzu/velero/pkg/util/podvolume"
)
// TODO: Replace hardcoded VolumeSnapshot finalizer strings with constants from
@@ -75,15 +75,13 @@ type pvcBackupItemAction struct {
log logrus.FieldLogger
crClient crclient.Client
// volumeHelper caches the VolumeHelper instance for the current backup.
// pvcPodCache provides lazy per-namespace caching of PVC-to-Pod mappings.
// Since plugin instances are unique per backup (created via newPluginManager and
// cleaned up via CleanupClients at backup completion), we can safely cache this
// without mutex or backup UID tracking.
// This avoids the O(N*M) performance issue when there are many PVCs and pods.
// See issue #9179 for details.
volumeHelper internalvolumehelper.VolumeHelper
// cachedForBackup tracks which backup the volumeHelper was built for.
// If the backup UID changes, we need to rebuild the cache.
cachedForBackup types.UID
// mu protects volumeHelper and cachedForBackup for concurrent access.
mu sync.Mutex
// See issue #9179 and PR #9226 for details.
pvcPodCache *podvolumeutil.PVCPodCache
}
// AppliesTo returns information indicating that the PVCBackupItemAction
@@ -109,29 +107,57 @@ func (p *pvcBackupItemAction) validateBackup(backup velerov1api.Backup) (valid b
return true
}
// getOrCreateVolumeHelper returns a cached VolumeHelper for the given backup.
// If the backup UID has changed or no VolumeHelper exists, a new one is created.
// This avoids the O(N*M) performance issue when there are many PVCs and pods.
// See issue #9179 for details.
func (p *pvcBackupItemAction) getOrCreateVolumeHelper(backup *velerov1api.Backup) (internalvolumehelper.VolumeHelper, error) {
p.mu.Lock()
defer p.mu.Unlock()
// Check if we already have a VolumeHelper for this backup
if p.volumeHelper != nil && p.cachedForBackup == backup.UID {
return p.volumeHelper, nil
// ensurePVCPodCacheForNamespace ensures the PVC-to-Pod cache is built for the given namespace.
// This uses lazy per-namespace caching following the pattern from PR #9226.
// Since plugin instances are unique per backup, we can safely cache without mutex or backup UID tracking.
func (p *pvcBackupItemAction) ensurePVCPodCacheForNamespace(ctx context.Context, namespace string) error {
// Initialize cache if needed
if p.pvcPodCache == nil {
p.pvcPodCache = podvolumeutil.NewPVCPodCache()
}
// Build a new VolumeHelper with cache for this backup
p.log.Infof("Building VolumeHelper with PVC-to-Pod cache for backup %s/%s", backup.Namespace, backup.Name)
vh, err := volumehelper.NewVolumeHelperForBackup(*backup, p.crClient, p.log, nil)
// Build cache for namespace if not already done
if !p.pvcPodCache.IsNamespaceBuilt(namespace) {
p.log.Debugf("Building PVC-to-Pod cache for namespace %s", namespace)
if err := p.pvcPodCache.BuildCacheForNamespace(ctx, namespace, p.crClient); err != nil {
return errors.Wrapf(err, "failed to build PVC-to-Pod cache for namespace %s", namespace)
}
}
return nil
}
// getVolumeHelperWithCache creates a VolumeHelper using the pre-built PVC-to-Pod cache.
// The cache should be ensured for the relevant namespace(s) before calling this.
func (p *pvcBackupItemAction) getVolumeHelperWithCache(backup *velerov1api.Backup) (internalvolumehelper.VolumeHelper, error) {
// Create VolumeHelper with our lazy-built cache
vh, err := internalvolumehelper.NewVolumeHelperImplWithCache(
*backup,
p.crClient,
p.log,
p.pvcPodCache,
)
if err != nil {
return nil, errors.Wrap(err, "failed to create VolumeHelper for backup")
return nil, errors.Wrap(err, "failed to create VolumeHelper")
}
return vh, nil
}
// getOrCreateVolumeHelper returns a VolumeHelper with lazy per-namespace caching.
// The VolumeHelper uses the pvcPodCache which is populated lazily as namespaces are encountered.
// Callers should use ensurePVCPodCacheForNamespace before calling methods that need
// PVC-to-Pod lookups for a specific namespace.
// Since plugin instances are unique per backup (created via newPluginManager and
// cleaned up via CleanupClients at backup completion), we can safely cache this.
// See issue #9179 and PR #9226 for details.
func (p *pvcBackupItemAction) getOrCreateVolumeHelper(backup *velerov1api.Backup) (internalvolumehelper.VolumeHelper, error) {
// Initialize the PVC-to-Pod cache if needed
if p.pvcPodCache == nil {
p.pvcPodCache = podvolumeutil.NewPVCPodCache()
}
p.volumeHelper = vh
p.cachedForBackup = backup.UID
return vh, nil
// Return the VolumeHelper with our lazily-built cache
// The cache will be populated incrementally as namespaces are encountered
return p.getVolumeHelperWithCache(backup)
}
func (p *pvcBackupItemAction) validatePVCandPV(
@@ -285,6 +311,11 @@ func (p *pvcBackupItemAction) Execute(
return item, nil, "", nil, nil
}
// Ensure PVC-to-Pod cache is built for this namespace (lazy per-namespace caching)
if err := p.ensurePVCPodCacheForNamespace(context.TODO(), pvc.Namespace); err != nil {
return nil, nil, "", nil, err
}
// Get or create the cached VolumeHelper for this backup
vh, err := p.getOrCreateVolumeHelper(backup)
if err != nil {
@@ -665,6 +696,11 @@ func (p *pvcBackupItemAction) getVolumeSnapshotReference(
return nil, errors.Wrapf(err, "failed to list PVCs in VolumeGroupSnapshot group %q in namespace %q", group, pvc.Namespace)
}
// Ensure PVC-to-Pod cache is built for this namespace (lazy per-namespace caching)
if err := p.ensurePVCPodCacheForNamespace(ctx, pvc.Namespace); err != nil {
return nil, errors.Wrapf(err, "failed to build PVC-to-Pod cache for namespace %s", pvc.Namespace)
}
// Get the cached VolumeHelper for filtering PVCs by volume policy
vh, err := p.getOrCreateVolumeHelper(backup)
if err != nil {

View File

@@ -2067,87 +2067,11 @@ func TestPVCRequestSize(t *testing.T) {
}
}
// TestGetOrCreateVolumeHelper tests the VolumeHelper caching behavior
// TestGetOrCreateVolumeHelper tests the VolumeHelper and PVC-to-Pod cache behavior.
// Since plugin instances are unique per backup (created via newPluginManager and
// cleaned up via CleanupClients at backup completion), we verify that the pvcPodCache
// is properly initialized and reused across calls.
func TestGetOrCreateVolumeHelper(t *testing.T) {
tests := []struct {
name string
setup func() (*pvcBackupItemAction, *velerov1api.Backup, *velerov1api.Backup)
wantSameCache bool
}{
{
name: "Returns same VolumeHelper for same backup UID",
setup: func() (*pvcBackupItemAction, *velerov1api.Backup, *velerov1api.Backup) {
client := velerotest.NewFakeControllerRuntimeClient(t)
action := &pvcBackupItemAction{
log: velerotest.NewLogger(),
crClient: client,
}
backup := &velerov1api.Backup{
ObjectMeta: metav1.ObjectMeta{
Name: "test-backup",
Namespace: "velero",
UID: types.UID("test-uid-1"),
},
}
return action, backup, backup // Same backup instance
},
wantSameCache: true,
},
{
name: "Returns new VolumeHelper for different backup UID",
setup: func() (*pvcBackupItemAction, *velerov1api.Backup, *velerov1api.Backup) {
client := velerotest.NewFakeControllerRuntimeClient(t)
action := &pvcBackupItemAction{
log: velerotest.NewLogger(),
crClient: client,
}
backup1 := &velerov1api.Backup{
ObjectMeta: metav1.ObjectMeta{
Name: "test-backup-1",
Namespace: "velero",
UID: types.UID("test-uid-1"),
},
}
backup2 := &velerov1api.Backup{
ObjectMeta: metav1.ObjectMeta{
Name: "test-backup-2",
Namespace: "velero",
UID: types.UID("test-uid-2"),
},
}
return action, backup1, backup2 // Different backup instances
},
wantSameCache: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
action, backup1, backup2 := tt.setup()
// Get VolumeHelper for first backup
vh1, err := action.getOrCreateVolumeHelper(backup1)
require.NoError(t, err)
require.NotNil(t, vh1)
// Get VolumeHelper for second backup
vh2, err := action.getOrCreateVolumeHelper(backup2)
require.NoError(t, err)
require.NotNil(t, vh2)
if tt.wantSameCache {
// Same backup UID should return same VolumeHelper pointer
require.Same(t, vh1, vh2, "Expected same VolumeHelper instance for same backup UID")
} else {
// Different backup UID should return different VolumeHelper pointer
require.NotSame(t, vh1, vh2, "Expected different VolumeHelper instance for different backup UID")
}
})
}
}
// TestGetOrCreateVolumeHelperConcurrency tests thread-safety of VolumeHelper caching
func TestGetOrCreateVolumeHelperConcurrency(t *testing.T) {
client := velerotest.NewFakeControllerRuntimeClient(t)
action := &pvcBackupItemAction{
log: velerotest.NewLogger(),
@@ -2157,41 +2081,27 @@ func TestGetOrCreateVolumeHelperConcurrency(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Name: "test-backup",
Namespace: "velero",
UID: types.UID("test-uid"),
UID: types.UID("test-uid-1"),
},
}
// Run multiple goroutines concurrently to get VolumeHelper
const numGoroutines = 10
results := make(chan any, numGoroutines)
errors := make(chan error, numGoroutines)
// Initially, pvcPodCache should be nil
require.Nil(t, action.pvcPodCache, "pvcPodCache should be nil initially")
for i := 0; i < numGoroutines; i++ {
go func() {
vh, err := action.getOrCreateVolumeHelper(backup)
if err != nil {
errors <- err
return
}
results <- vh
}()
}
// Get VolumeHelper first time - should create new cache and VolumeHelper
vh1, err := action.getOrCreateVolumeHelper(backup)
require.NoError(t, err)
require.NotNil(t, vh1)
// Collect all results
var volumeHelpers []any
for i := 0; i < numGoroutines; i++ {
select {
case vh := <-results:
volumeHelpers = append(volumeHelpers, vh)
case err := <-errors:
t.Fatalf("Unexpected error: %v", err)
}
}
// pvcPodCache should now be initialized
require.NotNil(t, action.pvcPodCache, "pvcPodCache should be initialized after first call")
cache1 := action.pvcPodCache
// All goroutines should get the same VolumeHelper instance
require.Len(t, volumeHelpers, numGoroutines)
firstVH := volumeHelpers[0]
for i := 1; i < len(volumeHelpers); i++ {
require.Same(t, firstVH, volumeHelpers[i], "All goroutines should get the same VolumeHelper instance")
}
// Get VolumeHelper second time - should reuse the same cache
vh2, err := action.getOrCreateVolumeHelper(backup)
require.NoError(t, err)
require.NotNil(t, vh2)
// The pvcPodCache should be the same instance
require.Same(t, cache1, action.pvcPodCache, "Expected same pvcPodCache instance on repeated calls")
}