Issue 8344: constrain data path expose (#9064)
Some checks failed
Run the E2E test on kind / build (push) Failing after 7m38s
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 39s
Close stale issues and PRs / stale (push) Successful in 22s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m32s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m41s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m30s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m18s

* issue 8344: constrain data path exposure.

Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
This commit is contained in:
lyndon-li
2025-07-18 13:32:45 +08:00
committed by GitHub
parent 29a8bc4492
commit 06d305ea47
16 changed files with 671 additions and 57 deletions

View File

@@ -57,6 +57,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/constant"
"github.com/vmware-tanzu/velero/pkg/controller"
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/exposer"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
@@ -140,6 +141,7 @@ type nodeAgentServer struct {
csiSnapshotClient *snapshotv1client.Clientset
dataPathMgr *datapath.Manager
dataPathConfigs *nodeagent.Configs
vgdpCounter *exposer.VgdpCounter
}
func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, config nodeAgentServerConfig) (*nodeAgentServer, error) {
@@ -300,12 +302,21 @@ func (s *nodeAgentServer) run() {
}
}
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger)
if s.dataPathConfigs != nil && s.dataPathConfigs.LoadConcurrency != nil && s.dataPathConfigs.LoadConcurrency.PrepareQueueLength > 0 {
if counter, err := exposer.StartVgdpCounter(s.ctx, s.mgr, s.dataPathConfigs.LoadConcurrency.PrepareQueueLength); err != nil {
s.logger.WithError(err).Warnf("Failed to start VGDP counter, VDGP loads are not constrained")
} else {
s.vgdpCounter = counter
s.logger.Infof("VGDP loads are constrained with %d", s.dataPathConfigs.LoadConcurrency.PrepareQueueLength)
}
}
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger)
if err := pvbReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup)
}
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger)
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger)
if err := pvrReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller")
}
@@ -320,6 +331,7 @@ func (s *nodeAgentServer) run() {
s.kubeClient,
s.csiSnapshotClient.SnapshotV1(),
s.dataPathMgr,
s.vgdpCounter,
loadAffinity,
backupPVCConfig,
podResources,
@@ -344,6 +356,7 @@ func (s *nodeAgentServer) run() {
s.mgr,
s.kubeClient,
s.dataPathMgr,
s.vgdpCounter,
loadAffinity,
restorePVCConfig,
podResources,