From f2133c7d22c9e01c439a7556931cc36a9ef4ce7d Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Mon, 19 May 2025 17:22:24 +0800 Subject: [PATCH] The backup and restore data mover node selection. Update Makefile to let the `make serve-docs` work again. Signed-off-by: Xun Jiang --- Makefile | 4 +- changelogs/unreleased/8949-blackpiglet | 1 + design/vgdp-affinity-enhancement.md | 46 +--- pkg/cmd/cli/nodeagent/server.go | 19 +- pkg/controller/data_download_controller.go | 21 +- .../data_download_controller_test.go | 88 ++++---- pkg/controller/data_upload_controller.go | 8 +- pkg/exposer/csi_snapshot.go | 12 +- pkg/exposer/csi_snapshot_test.go | 137 ++++++++++- pkg/exposer/generic_restore.go | 50 ++++- pkg/exposer/generic_restore_test.go | 137 ++++++++++- pkg/util/kube/pod.go | 33 +++ pkg/util/kube/pod_test.go | 180 +++++++++++++++ pkg/util/kube/pvc_pv.go | 10 +- .../docs/main/csi-snapshot-data-movement.md | 2 +- .../data-movement-backup-node-selection.md | 78 ------- .../docs/main/data-movement-node-selection.md | 212 ++++++++++++++++++ site/data/docs/main-toc.yml | 24 +- 18 files changed, 851 insertions(+), 211 deletions(-) create mode 100644 changelogs/unreleased/8949-blackpiglet delete mode 100644 site/content/docs/main/data-movement-backup-node-selection.md create mode 100644 site/content/docs/main/data-movement-node-selection.md diff --git a/Makefile b/Makefile index 7309fe9af..82129623d 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ endif BUILDER_IMAGE := $(REGISTRY)/build-image:$(BUILDER_IMAGE_TAG) BUILDER_IMAGE_CACHED := $(shell docker images -q ${BUILDER_IMAGE} 2>/dev/null ) -HUGO_IMAGE := hugo-builder +HUGO_IMAGE := ghcr.io/gohugoio/hugo # Which architecture to build - see $(ALL_ARCH) for options. # if the 'local' rule is being run, detect the ARCH from 'go env' @@ -451,7 +451,7 @@ release: serve-docs: build-image-hugo docker run \ --rm \ - -v "$$(pwd)/site:/srv/hugo" \ + -v "$$(pwd)/site:/project" \ -it -p 1313:1313 \ $(HUGO_IMAGE) \ server --bind=0.0.0.0 --enableGitInfo=false diff --git a/changelogs/unreleased/8949-blackpiglet b/changelogs/unreleased/8949-blackpiglet new file mode 100644 index 000000000..a434ee160 --- /dev/null +++ b/changelogs/unreleased/8949-blackpiglet @@ -0,0 +1 @@ +The backup and restore VGDP affinity enhancement implementation. diff --git a/design/vgdp-affinity-enhancement.md b/design/vgdp-affinity-enhancement.md index bbac325b0..dc9e458c7 100644 --- a/design/vgdp-affinity-enhancement.md +++ b/design/vgdp-affinity-enhancement.md @@ -12,7 +12,6 @@ The implemented [VGDP LoadAffinity design][3] already defined the a structure `L There are still some limitations of this design: * The affinity setting is global. Say there are two StorageClasses and the underlying storage can only provision volumes to part of the cluster nodes. The supported nodes don't have intersection. Then the affinity will definitely not work in some cases. -* The old design only take the first element of the []*LoadAffinity array. By this way, it cannot support the or logic between Affinity selectors. * The old design focuses on the backupPod affinity, but the restorePod also needs the affinity setting. As a result, create this design to address the limitations. @@ -34,7 +33,6 @@ This design still uses the ConfigMap specified by `velero node-agent` CLI's para Upon the implemented [VGDP LoadAffinity design][3] introduced `[]*LoadAffinity` structure, this design add a new field `StorageClass`. This field is optional. * If the `LoadAffinity` element's `StorageClass` doesn't have value, it means this element is applied to global, just as the old design. * If the `LoadAffinity` element's `StorageClass` has value, it means this element is applied to the VGDP instances' PVCs use the specified StorageClass. -* To support the or logic between LoadAffinity elements, this design allows multiple instances of `LoadAffinity` whose `StorageClass` field have the same value. * The `LoadAffinity` element whose `StorageClass` has value has higher priority than the `LoadAffinity` element whose `StorageClass` doesn't have value. @@ -93,14 +91,8 @@ flowchart TD O -->|No loadAffinity configured| R[No affinity constraints
Schedule on any available node
🌐 DEFAULT] - N --> S{Multiple rules in array?} - S -->|Yes| T[Apply all rules as OR conditions
Pod scheduled on nodes matching ANY rule] - S -->|No| U[Apply single rule
Pod scheduled on nodes matching this rule] - - O --> S - - T --> V[Validate node-agent availability
⚠️ Ensure node-agent pods exist on target nodes] - U --> V + O --> V[Validate node-agent availability
⚠️ Ensure node-agent pods exist on target nodes] + N --> V V --> W{Node-agent available on selected nodes?} W -->|Yes| X[✅ VGDP Pod scheduled successfully] @@ -126,40 +118,6 @@ flowchart TD ### Examples -#### Multiple LoadAffinities - -``` json -{ - "loadAffinity": [ - { - "nodeSelector": { - "matchLabels": { - "beta.kubernetes.io/instance-type": "Standard_B4ms" - } - } - }, - { - "nodeSelector": { - "matchExpressions": [ - { - "key": "topology.kubernetes.io/zone", - "operator": "In", - "values": [ - "us-central1-a" - ] - } - ] - } - } - ] -} -``` - -This sample demonstrates how to use multiple affinities in `loadAffinity`. That can support more complicated scenarios, e.g. need to filter nodes satisfied either of two conditions, instead of satisfied both of two conditions. - -In this example, the VGDP pods will be assigned to nodes, which instance type is `Standard_B4ms` or which zone is `us-central1-a`. - - #### LoadAffinity interacts with LoadAffinityPerStorageClass ``` json diff --git a/pkg/cmd/cli/nodeagent/server.go b/pkg/cmd/cli/nodeagent/server.go index 5e1465ce7..78dd1abcd 100644 --- a/pkg/cmd/cli/nodeagent/server.go +++ b/pkg/cmd/cli/nodeagent/server.go @@ -278,9 +278,9 @@ func (s *nodeAgentServer) run() { s.logger.Info("Starting controllers") - var loadAffinity *kube.LoadAffinity + var loadAffinity []*kube.LoadAffinity if s.dataPathConfigs != nil && len(s.dataPathConfigs.LoadAffinity) > 0 { - loadAffinity = s.dataPathConfigs.LoadAffinity[0] + loadAffinity = s.dataPathConfigs.LoadAffinity s.logger.Infof("Using customized loadAffinity %v", loadAffinity) } @@ -339,7 +339,20 @@ func (s *nodeAgentServer) run() { s.logger.Infof("Using customized restorePVC config %v", restorePVCConfig) } - dataDownloadReconciler := controller.NewDataDownloadReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, restorePVCConfig, podResources, s.nodeName, s.config.dataMoverPrepareTimeout, s.logger, s.metrics) + dataDownloadReconciler := controller.NewDataDownloadReconciler( + s.mgr.GetClient(), + s.mgr, + s.kubeClient, + s.dataPathMgr, + loadAffinity, + restorePVCConfig, + podResources, + s.nodeName, + s.config.dataMoverPrepareTimeout, + s.logger, + s.metrics, + ) + if err := dataDownloadReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the data download controller") } diff --git a/pkg/controller/data_download_controller.go b/pkg/controller/data_download_controller.go index 44e6b0fe7..22f811af9 100644 --- a/pkg/controller/data_download_controller.go +++ b/pkg/controller/data_download_controller.go @@ -64,6 +64,7 @@ type DataDownloadReconciler struct { restoreExposer exposer.GenericRestoreExposer nodeName string dataPathMgr *datapath.Manager + loadAffinity []*kube.LoadAffinity restorePVCConfig nodeagent.RestorePVC podResources corev1api.ResourceRequirements preparingTimeout time.Duration @@ -71,9 +72,19 @@ type DataDownloadReconciler struct { cancelledDataDownload map[string]time.Time } -func NewDataDownloadReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, - restorePVCConfig nodeagent.RestorePVC, podResources corev1api.ResourceRequirements, nodeName string, preparingTimeout time.Duration, - logger logrus.FieldLogger, metrics *metrics.ServerMetrics) *DataDownloadReconciler { +func NewDataDownloadReconciler( + client client.Client, + mgr manager.Manager, + kubeClient kubernetes.Interface, + dataPathMgr *datapath.Manager, + loadAffinity []*kube.LoadAffinity, + restorePVCConfig nodeagent.RestorePVC, + podResources corev1api.ResourceRequirements, + nodeName string, + preparingTimeout time.Duration, + logger logrus.FieldLogger, + metrics *metrics.ServerMetrics, +) *DataDownloadReconciler { return &DataDownloadReconciler{ client: client, kubeClient: kubeClient, @@ -84,6 +95,7 @@ func NewDataDownloadReconciler(client client.Client, mgr manager.Manager, kubeCl restoreExposer: exposer.NewGenericRestoreExposer(kubeClient, logger), restorePVCConfig: restorePVCConfig, dataPathMgr: dataPathMgr, + loadAffinity: loadAffinity, podResources: podResources, preparingTimeout: preparingTimeout, metrics: metrics, @@ -828,6 +840,8 @@ func (r *DataDownloadReconciler) setupExposeParam(dd *velerov2alpha1api.DataDown } } + affinity := kube.GetLoadAffinityByStorageClass(r.loadAffinity, dd.Spec.BackupStorageLocation, log) + return exposer.GenericRestoreExposeParam{ TargetPVCName: dd.Spec.TargetVolume.PVC, TargetNamespace: dd.Spec.TargetVolume.Namespace, @@ -838,6 +852,7 @@ func (r *DataDownloadReconciler) setupExposeParam(dd *velerov2alpha1api.DataDown ExposeTimeout: r.preparingTimeout, NodeOS: nodeOS, RestorePVCConfig: r.restorePVCConfig, + LoadAffinity: affinity, }, nil } diff --git a/pkg/controller/data_download_controller_test.go b/pkg/controller/data_download_controller_test.go index ed67a13f8..8f629d468 100644 --- a/pkg/controller/data_download_controller_test.go +++ b/pkg/controller/data_download_controller_test.go @@ -40,8 +40,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1" "github.com/vmware-tanzu/velero/pkg/builder" @@ -70,7 +68,9 @@ func dataDownloadBuilder() *builder.DataDownloadBuilder { }) } -func initDataDownloadReconciler(objects []runtime.Object, needError ...bool) (*DataDownloadReconciler, error) { +func initDataDownloadReconciler(t *testing.T, objects []any, needError ...bool) (*DataDownloadReconciler, error) { + t.Helper() + var errs = make([]error, 6) for k, isError := range needError { if k == 0 && isError { @@ -87,28 +87,24 @@ func initDataDownloadReconciler(objects []runtime.Object, needError ...bool) (*D errs[5] = fmt.Errorf("List error") } } - return initDataDownloadReconcilerWithError(objects, errs...) + return initDataDownloadReconcilerWithError(t, objects, errs...) } -func initDataDownloadReconcilerWithError(objects []runtime.Object, needError ...error) (*DataDownloadReconciler, error) { - scheme := runtime.NewScheme() - err := velerov1api.AddToScheme(scheme) - if err != nil { - return nil, err - } - err = velerov2alpha1api.AddToScheme(scheme) - if err != nil { - return nil, err - } - err = corev1api.AddToScheme(scheme) - if err != nil { - return nil, err +func initDataDownloadReconcilerWithError(t *testing.T, objects []any, needError ...error) (*DataDownloadReconciler, error) { + t.Helper() + + runtimeObjects := make([]runtime.Object, 0) + + for _, obj := range objects { + runtimeObjects = append(runtimeObjects, obj.(runtime.Object)) } - fakeClient := &FakeClient{ - Client: fake.NewClientBuilder().WithScheme(scheme).Build(), + fakeClient := FakeClient{ + Client: velerotest.NewFakeControllerRuntimeClient(t, runtimeObjects...), } + fakeKubeClient := clientgofake.NewSimpleClientset(runtimeObjects...) + for k := range needError { if k == 0 { fakeClient.getError = needError[0] @@ -125,26 +121,32 @@ func initDataDownloadReconcilerWithError(objects []runtime.Object, needError ... } } - var fakeKubeClient *clientgofake.Clientset - if len(objects) != 0 { - fakeKubeClient = clientgofake.NewSimpleClientset(objects...) - } else { - fakeKubeClient = clientgofake.NewSimpleClientset() - } - fakeFS := velerotest.NewFakeFileSystem() pathGlob := fmt.Sprintf("/host_pods/%s/volumes/*/%s", "test-uid", "test-pvc") - _, err = fakeFS.Create(pathGlob) + _, err := fakeFS.Create(pathGlob) if err != nil { return nil, err } dataPathMgr := datapath.NewManager(1) - return NewDataDownloadReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, nodeagent.RestorePVC{}, corev1api.ResourceRequirements{}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics()), nil + return NewDataDownloadReconciler( + &fakeClient, + nil, + fakeKubeClient, + dataPathMgr, + nil, + nodeagent.RestorePVC{}, + corev1api.ResourceRequirements{}, + "test-node", + time.Minute*5, + velerotest.NewLogger(), + metrics.NewServerMetrics()), nil } func TestDataDownloadReconcile(t *testing.T) { + sc := builder.ForStorageClass("sc").Result() + daemonSet := &appsv1api.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ Namespace: "velero", @@ -330,7 +332,7 @@ func TestDataDownloadReconcile(t *testing.T) { { name: "dd succeeds for accepted", dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(), - targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(), + targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").StorageClass("sc").Result(), expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(), }, { @@ -457,13 +459,13 @@ func TestDataDownloadReconcile(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - objs := []runtime.Object{daemonSet, node} + objects := []any{daemonSet, node, sc} if test.targetPVC != nil { - objs = append(objs, test.targetPVC) + objects = append(objects, test.targetPVC) } - r, err := initDataDownloadReconciler(objs, test.needErrs...) + r, err := initDataDownloadReconciler(t, objects, test.needErrs...) require.NoError(t, err) if !test.notCreateDD { @@ -607,7 +609,7 @@ func TestOnDataDownloadFailed(t *testing.T) { for _, getErr := range []bool{true, false} { ctx := context.TODO() needErrs := []bool{getErr, false, false, false} - r, err := initDataDownloadReconciler(nil, needErrs...) + r, err := initDataDownloadReconciler(t, nil, needErrs...) require.NoError(t, err) dd := dataDownloadBuilder().Result() @@ -633,7 +635,7 @@ func TestOnDataDownloadCancelled(t *testing.T) { for _, getErr := range []bool{true, false} { ctx := context.TODO() needErrs := []bool{getErr, false, false, false} - r, err := initDataDownloadReconciler(nil, needErrs...) + r, err := initDataDownloadReconciler(t, nil, needErrs...) require.NoError(t, err) dd := dataDownloadBuilder().Result() @@ -675,7 +677,7 @@ func TestOnDataDownloadCompleted(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.TODO() needErrs := []bool{test.isGetErr, false, false, false} - r, err := initDataDownloadReconciler(nil, needErrs...) + r, err := initDataDownloadReconciler(t, nil, needErrs...) r.restoreExposer = func() exposer.GenericRestoreExposer { ep := exposermockes.NewGenericRestoreExposer(t) if test.rebindVolumeErr { @@ -740,7 +742,7 @@ func TestOnDataDownloadProgress(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.TODO() - r, err := initDataDownloadReconciler(nil, test.needErrs...) + r, err := initDataDownloadReconciler(t, nil, test.needErrs...) require.NoError(t, err) defer func() { r.client.Delete(ctx, test.dd, &kbclient.DeleteOptions{}) @@ -774,7 +776,7 @@ func TestOnDataDownloadProgress(t *testing.T) { func TestFindDataDownloadForPod(t *testing.T) { needErrs := []bool{false, false, false, false} - r, err := initDataDownloadReconciler(nil, needErrs...) + r, err := initDataDownloadReconciler(t, nil, needErrs...) require.NoError(t, err) tests := []struct { name string @@ -860,7 +862,7 @@ func TestAcceptDataDownload(t *testing.T) { } for _, test := range tests { ctx := context.Background() - r, err := initDataDownloadReconcilerWithError(nil, test.needErrs...) + r, err := initDataDownloadReconcilerWithError(t, nil, test.needErrs...) require.NoError(t, err) err = r.client.Create(ctx, test.dd) @@ -904,7 +906,7 @@ func TestOnDdPrepareTimeout(t *testing.T) { } for _, test := range tests { ctx := context.Background() - r, err := initDataDownloadReconcilerWithError(nil, test.needErrs...) + r, err := initDataDownloadReconcilerWithError(t, nil, test.needErrs...) require.NoError(t, err) err = r.client.Create(ctx, test.dd) @@ -949,7 +951,7 @@ func TestTryCancelDataDownload(t *testing.T) { } for _, test := range tests { ctx := context.Background() - r, err := initDataDownloadReconcilerWithError(nil, test.needErrs...) + r, err := initDataDownloadReconcilerWithError(t, nil, test.needErrs...) require.NoError(t, err) err = r.client.Create(ctx, test.dd) @@ -1007,7 +1009,7 @@ func TestUpdateDataDownloadWithRetry(t *testing.T) { t.Run(tc.Name, func(t *testing.T) { ctx, cancelFunc := context.WithTimeout(context.TODO(), time.Second*5) defer cancelFunc() - r, err := initDataDownloadReconciler(nil, tc.needErrs...) + r, err := initDataDownloadReconciler(t, nil, tc.needErrs...) require.NoError(t, err) err = r.client.Create(ctx, dataDownloadBuilder().Result()) require.NoError(t, err) @@ -1124,7 +1126,7 @@ func TestAttemptDataDownloadResume(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { ctx := context.TODO() - r, err := initDataDownloadReconciler(nil, test.needErrs...) + r, err := initDataDownloadReconciler(t, nil, test.needErrs...) r.nodeName = "node-1" require.NoError(t, err) defer func() { @@ -1242,7 +1244,7 @@ func TestResumeCancellableRestore(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { ctx := context.TODO() - r, err := initDataDownloadReconciler(nil, false) + r, err := initDataDownloadReconciler(t, nil, false) r.nodeName = "node-1" require.NoError(t, err) diff --git a/pkg/controller/data_upload_controller.go b/pkg/controller/data_upload_controller.go index a840ebc16..9f9563d6b 100644 --- a/pkg/controller/data_upload_controller.go +++ b/pkg/controller/data_upload_controller.go @@ -74,7 +74,7 @@ type DataUploadReconciler struct { logger logrus.FieldLogger snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer dataPathMgr *datapath.Manager - loadAffinity *kube.LoadAffinity + loadAffinity []*kube.LoadAffinity backupPVCConfig map[string]nodeagent.BackupPVC podResources corev1api.ResourceRequirements preparingTimeout time.Duration @@ -88,7 +88,7 @@ func NewDataUploadReconciler( kubeClient kubernetes.Interface, csiSnapshotClient snapshotter.SnapshotV1Interface, dataPathMgr *datapath.Manager, - loadAffinity *kube.LoadAffinity, + loadAffinity []*kube.LoadAffinity, backupPVCConfig map[string]nodeagent.BackupPVC, podResources corev1api.ResourceRequirements, clock clocks.WithTickerAndDelayedExecution, @@ -917,6 +917,8 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload } } + affinity := kube.GetLoadAffinityByStorageClass(r.loadAffinity, du.Spec.CSISnapshot.SnapshotClass, log) + return &exposer.CSISnapshotExposeParam{ SnapshotName: du.Spec.CSISnapshot.VolumeSnapshot, SourceNamespace: du.Spec.SourceNamespace, @@ -927,7 +929,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload OperationTimeout: du.Spec.OperationTimeout.Duration, ExposeTimeout: r.preparingTimeout, VolumeSize: pvc.Spec.Resources.Requests[corev1api.ResourceStorage], - Affinity: r.loadAffinity, + Affinity: affinity, BackupPVCConfig: r.backupPVCConfig, Resources: r.podResources, NodeOS: nodeOS, diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index b0257f340..a946c8244 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -583,11 +583,6 @@ func (e *csiSnapshotExposer) createBackupPod( args = append(args, podInfo.logFormatArgs...) args = append(args, podInfo.logLevelArgs...) - affinityList := make([]*kube.LoadAffinity, 0) - if affinity != nil { - affinityList = append(affinityList, affinity) - } - var securityCtx *corev1api.PodSecurityContext nodeSelector := map[string]string{} podOS := corev1api.PodOS{} @@ -625,6 +620,11 @@ func (e *csiSnapshotExposer) createBackupPod( podOS.Name = kube.NodeOSLinux } + var podAffinity *corev1api.Affinity + if affinity != nil { + podAffinity = kube.ToSystemAffinity([]*kube.LoadAffinity{affinity}) + } + pod := &corev1api.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, @@ -656,7 +656,7 @@ func (e *csiSnapshotExposer) createBackupPod( }, NodeSelector: nodeSelector, OS: &podOS, - Affinity: kube.ToSystemAffinity(affinityList), + Affinity: podAffinity, Containers: []corev1api.Container{ { Name: containerName, diff --git a/pkg/exposer/csi_snapshot_test.go b/pkg/exposer/csi_snapshot_test.go index 3f80eaccd..28e4477fb 100644 --- a/pkg/exposer/csi_snapshot_test.go +++ b/pkg/exposer/csi_snapshot_test.go @@ -41,6 +41,7 @@ import ( "github.com/vmware-tanzu/velero/pkg/nodeagent" velerotest "github.com/vmware-tanzu/velero/pkg/test" "github.com/vmware-tanzu/velero/pkg/util/boolptr" + "github.com/vmware-tanzu/velero/pkg/util/kube" ) type reactor struct { @@ -163,6 +164,7 @@ func TestExpose(t *testing.T) { expectedVolumeSize *resource.Quantity expectedReadOnlyPVC bool expectedBackupPVCStorageClass string + expectedAffinity *corev1api.Affinity }{ { name: "wait vs ready fail", @@ -467,6 +469,135 @@ func TestExpose(t *testing.T) { }, expectedBackupPVCStorageClass: "fake-sc-read-only", }, + { + name: "Affinity per StorageClass", + ownerBackup: backup, + exposeParam: CSISnapshotExposeParam{ + SnapshotName: "fake-vs", + SourceNamespace: "fake-ns", + StorageClass: "fake-sc", + AccessMode: AccessModeFileSystem, + OperationTimeout: time.Millisecond, + ExposeTimeout: time.Millisecond, + Affinity: &kube.LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + StorageClass: "fake-sc", + }, + }, + snapshotClientObj: []runtime.Object{ + vsObject, + vscObj, + }, + kubeClientObj: []runtime.Object{ + daemonSet, + }, + expectedAffinity: &corev1api.Affinity{ + NodeAffinity: &corev1api.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1api.NodeSelector{ + NodeSelectorTerms: []corev1api.NodeSelectorTerm{ + { + MatchExpressions: []corev1api.NodeSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: corev1api.NodeSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "Affinity per StorageClass with expectedBackupPVCStorageClass", + ownerBackup: backup, + exposeParam: CSISnapshotExposeParam{ + SnapshotName: "fake-vs", + SourceNamespace: "fake-ns", + StorageClass: "fake-sc", + AccessMode: AccessModeFileSystem, + OperationTimeout: time.Millisecond, + ExposeTimeout: time.Millisecond, + BackupPVCConfig: map[string]nodeagent.BackupPVC{ + "fake-sc": { + StorageClass: "fake-sc-read-only", + }, + }, + Affinity: &kube.LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"amd64"}, + }, + }, + }, + StorageClass: "fake-sc-read-only", + }, + }, + snapshotClientObj: []runtime.Object{ + vsObject, + vscObj, + }, + kubeClientObj: []runtime.Object{ + daemonSet, + }, + expectedBackupPVCStorageClass: "fake-sc-read-only", + expectedAffinity: &corev1api.Affinity{ + NodeAffinity: &corev1api.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1api.NodeSelector{ + NodeSelectorTerms: []corev1api.NodeSelectorTerm{ + { + MatchExpressions: []corev1api.NodeSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: corev1api.NodeSelectorOpIn, + Values: []string{"amd64"}, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "Affinity in exposeParam is nil", + ownerBackup: backup, + exposeParam: CSISnapshotExposeParam{ + SnapshotName: "fake-vs", + SourceNamespace: "fake-ns", + StorageClass: "fake-sc", + AccessMode: AccessModeFileSystem, + OperationTimeout: time.Millisecond, + ExposeTimeout: time.Millisecond, + BackupPVCConfig: map[string]nodeagent.BackupPVC{ + "fake-sc": { + StorageClass: "fake-sc-read-only", + }, + }, + Affinity: nil, + }, + snapshotClientObj: []runtime.Object{ + vsObject, + vscObj, + }, + kubeClientObj: []runtime.Object{ + daemonSet, + }, + expectedBackupPVCStorageClass: "fake-sc-read-only", + expectedAffinity: nil, + }, } for _, test := range tests { @@ -503,7 +634,7 @@ func TestExpose(t *testing.T) { if err == nil { require.NoError(t, err) - _, err = exposer.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(context.Background(), ownerObject.Name, metav1.GetOptions{}) + backupPod, err := exposer.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(context.Background(), ownerObject.Name, metav1.GetOptions{}) require.NoError(t, err) backupPVC, err := exposer.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(context.Background(), ownerObject.Name, metav1.GetOptions{}) @@ -543,6 +674,10 @@ func TestExpose(t *testing.T) { if test.expectedBackupPVCStorageClass != "" { assert.Equal(t, test.expectedBackupPVCStorageClass, *backupPVC.Spec.StorageClassName) } + + if test.expectedAffinity != nil { + assert.Equal(t, test.expectedAffinity, backupPod.Spec.Affinity) + } } else { assert.EqualError(t, err, test.err) } diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index f02472648..53c8e09a7 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -63,6 +63,9 @@ type GenericRestoreExposeParam struct { // RestorePVCConfig is the config for restorePVC (intermediate PVC) of generic restore RestorePVCConfig nodeagent.RestorePVC + + // LoadAffinity specifies the node affinity of the backup pod + LoadAffinity *kube.LoadAffinity } // GenericRestoreExposer is the interfaces for a generic restore exposer @@ -111,7 +114,15 @@ func (e *genericRestoreExposer) Expose(ctx context.Context, ownerObject corev1ap "target namespace": param.TargetNamespace, }) - selectedNode, targetPVC, err := kube.WaitPVCConsumed(ctx, e.kubeClient.CoreV1(), param.TargetPVCName, param.TargetNamespace, e.kubeClient.StorageV1(), param.ExposeTimeout, param.RestorePVCConfig.IgnoreDelayBinding) + selectedNode, targetPVC, err := kube.WaitPVCConsumed( + ctx, + e.kubeClient.CoreV1(), + param.TargetPVCName, + param.TargetNamespace, + e.kubeClient.StorageV1(), + param.ExposeTimeout, + param.RestorePVCConfig.IgnoreDelayBinding, + ) if err != nil { return errors.Wrapf(err, "error to wait target PVC consumed, %s/%s", param.TargetNamespace, param.TargetPVCName) } @@ -122,7 +133,18 @@ func (e *genericRestoreExposer) Expose(ctx context.Context, ownerObject corev1ap return errors.Errorf("Target PVC %s/%s has already been bound, abort", param.TargetNamespace, param.TargetPVCName) } - restorePod, err := e.createRestorePod(ctx, ownerObject, targetPVC, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, selectedNode, param.Resources, param.NodeOS) + restorePod, err := e.createRestorePod( + ctx, + ownerObject, + targetPVC, + param.OperationTimeout, + param.HostingPodLabels, + param.HostingPodAnnotations, + selectedNode, + param.Resources, + param.NodeOS, + param.LoadAffinity, + ) if err != nil { return errors.Wrapf(err, "error to create restore pod") } @@ -376,14 +398,33 @@ func (e *genericRestoreExposer) RebindVolume(ctx context.Context, ownerObject co return nil } -func (e *genericRestoreExposer) createRestorePod(ctx context.Context, ownerObject corev1api.ObjectReference, targetPVC *corev1api.PersistentVolumeClaim, - operationTimeout time.Duration, label map[string]string, annotation map[string]string, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string) (*corev1api.Pod, error) { +func (e *genericRestoreExposer) createRestorePod( + ctx context.Context, + ownerObject corev1api.ObjectReference, + targetPVC *corev1api.PersistentVolumeClaim, + operationTimeout time.Duration, + label map[string]string, + annotation map[string]string, + selectedNode string, + resources corev1api.ResourceRequirements, + nodeOS string, + affinity *kube.LoadAffinity, +) (*corev1api.Pod, error) { restorePodName := ownerObject.Name restorePVCName := ownerObject.Name containerName := string(ownerObject.UID) volumeName := string(ownerObject.UID) + var podAffinity *corev1api.Affinity + if selectedNode == "" { + e.log.Infof("No selected node for restore pod. Try to get affinity from the node-agent config.") + + if affinity != nil { + podAffinity = kube.ToSystemAffinity([]*kube.LoadAffinity{affinity}) + } + } + podInfo, err := getInheritedPodInfo(ctx, e.kubeClient, ownerObject.Namespace, nodeOS) if err != nil { return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") @@ -512,6 +553,7 @@ func (e *genericRestoreExposer) createRestorePod(ctx context.Context, ownerObjec Tolerations: toleration, DNSPolicy: podInfo.dnsPolicy, DNSConfig: podInfo.dnsConfig, + Affinity: podAffinity, }, } diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index 2df512316..4270e9bc3 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -23,19 +23,22 @@ import ( "github.com/pkg/errors" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1api "k8s.io/api/apps/v1" + corev1api "k8s.io/api/core/v1" + storagev1api "k8s.io/api/storage/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" + clientTesting "k8s.io/client-go/testing" velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" velerotest "github.com/vmware-tanzu/velero/pkg/test" - - appsv1api "k8s.io/api/apps/v1" - corev1api "k8s.io/api/core/v1" - clientTesting "k8s.io/client-go/testing" + "github.com/vmware-tanzu/velero/pkg/util/kube" ) func TestRestoreExpose(t *testing.T) { + scName := "fake-sc" restore := &velerov1.Restore{ TypeMeta: metav1.TypeMeta{ APIVersion: velerov1.SchemeGroupVersion.String(), @@ -53,6 +56,15 @@ func TestRestoreExpose(t *testing.T) { Namespace: "fake-ns", Name: "fake-target-pvc", }, + Spec: corev1api.PersistentVolumeClaimSpec{ + StorageClassName: &scName, + }, + } + + storageClass := &storagev1api.StorageClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-sc", + }, } targetPVCObjBound := &corev1api.PersistentVolumeClaim{ @@ -110,6 +122,7 @@ func TestRestoreExpose(t *testing.T) { ownerRestore: restore, kubeClientObj: []runtime.Object{ targetPVCObjBound, + storageClass, }, err: "Target PVC fake-ns/fake-target-pvc has already been bound, abort", }, @@ -121,6 +134,7 @@ func TestRestoreExpose(t *testing.T) { kubeClientObj: []runtime.Object{ targetPVCObj, daemonSet, + storageClass, }, kubeReactors: []reactor{ { @@ -141,6 +155,7 @@ func TestRestoreExpose(t *testing.T) { kubeClientObj: []runtime.Object{ targetPVCObj, daemonSet, + storageClass, }, kubeReactors: []reactor{ { @@ -179,13 +194,19 @@ func TestRestoreExpose(t *testing.T) { } } - err := exposer.Expose(context.Background(), ownerObject, GenericRestoreExposeParam{ - TargetPVCName: test.targetPVCName, - TargetNamespace: test.targetNamespace, - HostingPodLabels: map[string]string{}, - Resources: corev1api.ResourceRequirements{}, - ExposeTimeout: time.Millisecond}) - assert.EqualError(t, err, test.err) + err := exposer.Expose( + context.Background(), + ownerObject, + GenericRestoreExposeParam{ + TargetPVCName: test.targetPVCName, + TargetNamespace: test.targetNamespace, + HostingPodLabels: map[string]string{}, + Resources: corev1api.ResourceRequirements{}, + ExposeTimeout: time.Millisecond, + LoadAffinity: nil, + }, + ) + require.EqualError(t, err, test.err) }) } } @@ -766,3 +787,97 @@ end diagnose restore exposer`, }) } } + +func TestCreateRestorePod(t *testing.T) { + scName := "storage-class-01" + + daemonSet := &appsv1api.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "velero", + Name: "node-agent", + }, + TypeMeta: metav1.TypeMeta{ + Kind: "DaemonSet", + APIVersion: appsv1api.SchemeGroupVersion.String(), + }, + Spec: appsv1api.DaemonSetSpec{ + Template: corev1api.PodTemplateSpec{ + Spec: corev1api.PodSpec{ + Containers: []corev1api.Container{ + { + Image: "fake-image", + }, + }, + }, + }, + }, + } + + targetPVCObj := &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "fake-ns", + Name: "fake-target-pvc", + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + StorageClassName: &scName, + }, + } + + tests := []struct { + name string + kubeClientObj []runtime.Object + selectedNode string + affinity *kube.LoadAffinity + expectedPod *corev1api.Pod + }{ + { + name: "", + kubeClientObj: []runtime.Object{daemonSet, targetPVCObj}, + selectedNode: "", + affinity: &kube.LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + StorageClass: scName, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...) + + exposer := genericRestoreExposer{ + kubeClient: fakeKubeClient, + log: velerotest.NewLogger(), + } + + pod, err := exposer.createRestorePod( + context.Background(), + corev1api.ObjectReference{ + Namespace: velerov1.DefaultNamespace, + Name: "data-download", + }, + targetPVCObj, + time.Second*3, + map[string]string{}, + map[string]string{}, + test.selectedNode, + corev1api.ResourceRequirements{}, + "linux", + test.affinity, + ) + + require.NoError(t, err) + if test.expectedPod != nil { + assert.Equal(t, test.expectedPod, pod) + } + }) + } +} diff --git a/pkg/util/kube/pod.go b/pkg/util/kube/pod.go index ba506dae1..4e20c5f1c 100644 --- a/pkg/util/kube/pod.go +++ b/pkg/util/kube/pod.go @@ -34,6 +34,9 @@ import ( type LoadAffinity struct { // NodeSelector specifies the label selector to match nodes NodeSelector metav1.LabelSelector `json:"nodeSelector"` + + // StorageClass specifies the VGDPs the LoadAffinity applied to. If the StorageClass doesn't have value, it applies to all. If not, it applies to only the VGDPs that use this StorageClass. + StorageClass string `json:"storageClass"` } type PodResources struct { @@ -301,3 +304,33 @@ func ExitPodWithMessage(logger logrus.FieldLogger, succeed bool, message string, funcExit(exitCode) } + +// GetLoadAffinityByStorageClass retrieves the LoadAffinity from the parameter affinityList. +// The function first try to find by the scName. If there is no such LoadAffinity, +// it will try to get the LoadAffinity whose StorageClass has no value. +func GetLoadAffinityByStorageClass( + affinityList []*LoadAffinity, + scName string, + logger logrus.FieldLogger, +) *LoadAffinity { + var globalAffinity *LoadAffinity + + for _, affinity := range affinityList { + if affinity.StorageClass == scName { + logger.WithField("StorageClass", scName).Info("Found backup pod's affinity setting per StorageClass.") + return affinity + } + + if affinity.StorageClass == "" && globalAffinity == nil { + globalAffinity = affinity + } + } + + if globalAffinity != nil { + logger.Info("Use the Global affinity for backup pod.") + } else { + logger.Info("No Affinity is found for backup pod.") + } + + return globalAffinity +} diff --git a/pkg/util/kube/pod_test.go b/pkg/util/kube/pod_test.go index 5e64752d6..54630df79 100644 --- a/pkg/util/kube/pod_test.go +++ b/pkg/util/kube/pod_test.go @@ -1039,3 +1039,183 @@ func TestExitPodWithMessage(t *testing.T) { }) } } + +func TestGetLoadAffinityByStorageClass(t *testing.T) { + tests := []struct { + name string + affinityList []*LoadAffinity + scName string + expectedAffinity *LoadAffinity + }{ + { + name: "get global affinity", + affinityList: []*LoadAffinity{ + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"amd64"}, + }, + }, + }, + StorageClass: "storage-class-01", + }, + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + }, + }, + scName: "", + expectedAffinity: &LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + }, + }, + { + name: "get affinity for StorageClass but only global affinity exists", + affinityList: []*LoadAffinity{ + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + }, + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"amd64"}, + }, + }, + }, + }, + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Windows"}, + }, + }, + }, + }, + }, + scName: "storage-class-01", + expectedAffinity: &LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + }, + }, + { + name: "get affinity for StorageClass", + affinityList: []*LoadAffinity{ + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/control-plane=", + Operator: metav1.LabelSelectorOpIn, + Values: []string{""}, + }, + }, + }, + }, + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + StorageClass: "storage-class-01", + }, + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"amd64"}, + }, + }, + }, + StorageClass: "invalid-storage-class", + }, + }, + scName: "storage-class-01", + expectedAffinity: &LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/os", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"Linux"}, + }, + }, + }, + StorageClass: "storage-class-01", + }, + }, + { + name: "Cannot find a match Affinity", + affinityList: []*LoadAffinity{ + { + NodeSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"amd64"}, + }, + }, + }, + StorageClass: "invalid-storage-class", + }, + }, + scName: "storage-class-01", + expectedAffinity: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := GetLoadAffinityByStorageClass(test.affinityList, test.scName, velerotest.NewLogger()) + + assert.Equal(t, test.expectedAffinity, result) + }) + } +} diff --git a/pkg/util/kube/pvc_pv.go b/pkg/util/kube/pvc_pv.go index 3ccba32ae..634d79127 100644 --- a/pkg/util/kube/pvc_pv.go +++ b/pkg/util/kube/pvc_pv.go @@ -305,8 +305,14 @@ func SetPVReclaimPolicy(ctx context.Context, pvGetter corev1client.CoreV1Interfa // WaitPVCConsumed waits for a PVC to be consumed by a pod so that the selected node is set by the pod scheduling; or does // nothing if the consuming doesn't affect the PV provision. // The latest PVC and the selected node will be returned. -func WaitPVCConsumed(ctx context.Context, pvcGetter corev1client.CoreV1Interface, pvc string, namespace string, - storageClient storagev1.StorageV1Interface, timeout time.Duration, ignoreConsume bool) (string, *corev1api.PersistentVolumeClaim, error) { +func WaitPVCConsumed( + ctx context.Context, + pvcGetter corev1client.CoreV1Interface, + pvc string, namespace string, + storageClient storagev1.StorageV1Interface, + timeout time.Duration, + ignoreConsume bool, +) (string, *corev1api.PersistentVolumeClaim, error) { selectedNode := "" var updated *corev1api.PersistentVolumeClaim var storageClass *storagev1api.StorageClass diff --git a/site/content/docs/main/csi-snapshot-data-movement.md b/site/content/docs/main/csi-snapshot-data-movement.md index 5da028865..544542e18 100644 --- a/site/content/docs/main/csi-snapshot-data-movement.md +++ b/site/content/docs/main/csi-snapshot-data-movement.md @@ -400,7 +400,7 @@ Sometimes, `RestorePVC` needs to be configured to increase the performance of re [12]: performance-guidance.md [13]: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/ [14]: node-agent-concurrency.md -[15]: data-movement-backup-node-selection.md +[15]: data-movement-node-selection.md [16]: data-movement-backup-pvc-configuration.md [17]: backup-repository-configuration.md [18]: https://github.com/vmware-tanzu/velero/pull/7576 diff --git a/site/content/docs/main/data-movement-backup-node-selection.md b/site/content/docs/main/data-movement-backup-node-selection.md deleted file mode 100644 index 3dfc6826b..000000000 --- a/site/content/docs/main/data-movement-backup-node-selection.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -title: "Node Selection for Data Movement Backup" -layout: docs ---- - -Velero node-agent is a daemonset hosting the data movement modules to complete the concrete work of backups/restores. -Varying from the data size, data complexity, resource availability, the data movement may take a long time and remarkable resources (CPU, memory, network bandwidth, etc.) during the backup and restore. - -Velero data movement backup supports to constrain the nodes where it runs. This is helpful in below scenarios: -- Prevent the data movement backup from running in specific nodes because users have more critical workloads in the nodes -- Constrain the data movement backup to run in specific nodes because these nodes have more resources than others -- Constrain the data movement backup to run in specific nodes because the storage allows volume/snapshot provisions in these nodes only - -Velero introduces a new section in the node-agent ConfigMap, called ```loadAffinity```, through which you can specify the nodes to/not to run data movement backups, in the affinity and anti-affinity flavors. -If it is not there, a ConfigMap should be created manually. The ConfigMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one ConfigMap in each namespace which applies to node-agent in that namespace only. The name of the ConfigMap should be specified in the node-agent server parameter ```--node-agent-configmap```. -Node-agent server checks these configurations at startup time. Therefore, you could edit this ConfigMap any time, but in order to make the changes effective, node-agent server needs to be restarted. - -The users can specify the ConfigMap name during velero installation by CLI: -`velero install --node-agent-configmap=` - -### Sample -Here is a sample of the ConfigMap with ```loadAffinity```: -```json -{ - "loadAffinity": [ - { - "nodeSelector": { - "matchLabels": { - "beta.kubernetes.io/instance-type": "Standard_B4ms" - }, - "matchExpressions": [ - { - "key": "kubernetes.io/hostname", - "values": [ - "node-1", - "node-2", - "node-3" - ], - "operator": "In" - }, - { - "key": "xxx/critial-workload", - "operator": "DoesNotExist" - } - ] - } - } - ] -} -``` -To create the ConfigMap, save something like the above sample to a json file and then run below command: -``` -kubectl create cm -n velero --from-file= -``` - -To provide the ConfigMap to node-agent, edit the node-agent daemonset and add the ```- --node-agent-configmap``` argument to the spec: -1. Open the node-agent daemonset spec -``` -kubectl edit ds node-agent -n velero -``` -2. Add ```- --node-agent-configmap``` to ```spec.template.spec.containers``` -``` -spec: - template: - spec: - containers: - - args: - - --node-agent-configmap= -``` - -### Affinity -Affinity configuration means allowing the data movement backup to run in the nodes specified. There are two ways to define it: -- It could be defined by `MatchLabels`. The labels defined in `MatchLabels` means a `LabelSelectorOpIn` operation by default, so in the current context, they will be treated as affinity rules. In the above sample, it defines to run data movement backups in nodes with label `beta.kubernetes.io/instance-type` of value `Standard_B4ms` (Run data movement backups in `Standard_B4ms` nodes only). -- It could be defined by `MatchExpressions`. The labels are defined in `Key` and `Values` of `MatchExpressions` and the `Operator` should be defined as `LabelSelectorOpIn` or `LabelSelectorOpExists`. In the above sample, it defines to run data movement backups in nodes with label `kubernetes.io/hostname` of values `node-1`, `node-2` and `node-3` (Run data movement backups in `node-1`, `node-2` and `node-3` only). - -### Anti-affinity -Anti-affinity configuration means preventing the data movement backup from running in the nodes specified. Below is the way to define it: -- It could be defined by `MatchExpressions`. The labels are defined in `Key` and `Values` of `MatchExpressions` and the `Operator` should be defined as `LabelSelectorOpNotIn` or `LabelSelectorOpDoesNotExist`. In the above sample, it disallows data movement backups to run in nodes with label `xxx/critial-workload`. \ No newline at end of file diff --git a/site/content/docs/main/data-movement-node-selection.md b/site/content/docs/main/data-movement-node-selection.md new file mode 100644 index 000000000..0eb17704f --- /dev/null +++ b/site/content/docs/main/data-movement-node-selection.md @@ -0,0 +1,212 @@ +--- +title: "Node Selection for Data Movement" +layout: docs +--- + +Velero node-agent is a DaemonSet hosting the data movement modules to complete the concrete work of backups/restores. +Varying from the data size, data complexity, resource availability, the data movement may take a long time and remarkable resources (CPU, memory, network bandwidth, etc.) during the backup and restore. + +Velero data movement backup and restore support to constrain the nodes where it runs. This is helpful in below scenarios: +- Prevent the data movement from running in specific nodes because users have more critical workloads in the nodes +- Constrain the data movement to run in specific nodes because these nodes have more resources than others +- Constrain the data movement to run in specific nodes because the storage allows volume/snapshot provisions in these nodes only + +Velero introduces a new section in the node-agent ConfigMap, called ```loadAffinity```, through which users can specify the nodes to/not to run data movement, in the affinity and anti-affinity flavors. + +If it is not there, a ConfigMap should be created manually. The ConfigMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one ConfigMap in each namespace which applies to node-agent in that namespace only. The name of the ConfigMap should be specified in the node-agent server parameter ```--node-agent-configmap```. +The node-agent server checks these configurations at startup time. Therefore, users could edit this ConfigMap any time, but in order to make the changes effective, node-agent server needs to be restarted. + +The users can specify the ConfigMap name during velero installation by CLI: +`velero install --node-agent-configmap=` + +## Node Selection manner + +### Affinity +Affinity configuration means allowing the data movement to run in the nodes specified. There are two ways to define it: +- It could be defined by `MatchLabels`. The labels defined in `MatchLabels` means a `LabelSelectorOpIn` operation by default, so in the current context, they will be treated as affinity rules. In the above sample, it defines to run data movement in nodes with label `beta.kubernetes.io/instance-type` of value `Standard_B4ms` (Run data movement in `Standard_B4ms` nodes only). +- It could be defined by `MatchExpressions`. The labels are defined in `Key` and `Values` of `MatchExpressions` and the `Operator` should be defined as `LabelSelectorOpIn` or `LabelSelectorOpExists`. In the above sample, it defines to run data movement in nodes with label `kubernetes.io/hostname` of values `node-1`, `node-2` and `node-3` (Run data movement in `node-1`, `node-2` and `node-3` only). + +### Anti-affinity +Anti-affinity configuration means preventing the data movement from running in the nodes specified. Below is the way to define it: +- It could be defined by `MatchExpressions`. The labels are defined in `Key` and `Values` of `MatchExpressions` and the `Operator` should be defined as `LabelSelectorOpNotIn` or `LabelSelectorOpDoesNotExist`. In the above sample, it disallows data movement to run in nodes with label `xxx/critial-workload`. + +## How to create the LoadAffinity ConfigMap and apply to the NodeAgent + +To create the ConfigMap, save something like the above sample to a json file and then run below command: +``` +kubectl create cm -n velero --from-file= +``` + +To provide the ConfigMap to node-agent, edit the node-agent DaemonSet and add the ```- --node-agent-configmap``` argument to the spec: +1. Open the node-agent daemonset spec +``` +kubectl edit ds node-agent -n velero +``` +2. Add ```- --node-agent-configmap``` to ```spec.template.spec.containers``` +``` +spec: + template: + spec: + containers: + - args: + - --node-agent-configmap= +``` + +## Examples + +### LoadAffinity +Here is a sample of the ConfigMap with ```loadAffinity```: +```json +{ + "loadAffinity": [ + { + "nodeSelector": { + "matchLabels": { + "beta.kubernetes.io/instance-type": "Standard_B4ms" + }, + "matchExpressions": [ + { + "key": "kubernetes.io/hostname", + "values": [ + "node-1", + "node-2", + "node-3" + ], + "operator": "In" + }, + { + "key": "xxx/critial-workload", + "operator": "DoesNotExist" + } + ] + } + } + ] +} +``` + +This example demonstrates how to use both `matchLabels` and `matchExpressions` in the same single LoadAffinity element. + +### LoadAffinity interacts with LoadAffinityPerStorageClass + +``` json +{ + "loadAffinity": [ + { + "nodeSelector": { + "matchLabels": { + "beta.kubernetes.io/instance-type": "Standard_B4ms" + } + } + }, + { + "nodeSelector": { + "matchExpressions": [ + { + "key": "kubernetes.io/os", + "values": [ + "linux" + ], + "operator": "In" + } + ] + }, + "storageClass": "kibishii-storage-class" + } + ] +} +``` + +This sample demonstrates how the `loadAffinity` elements with `StorageClass` field and without `StorageClass` field setting work together. If the VGDP mounting volume is created from StorageClass `kibishii-storage-class`, its pod will run Linux nodes. + +The other VGDP instances will run on nodes, which instance type is `Standard_B4ms`. + +### LoadAffinity interacts with BackupPVC + +``` json +{ + "loadAffinity": [ + { + "nodeSelector": { + "matchLabels": { + "beta.kubernetes.io/instance-type": "Standard_B4ms" + } + }, + "storageClass": "kibishii-storage-class" + }, + { + "nodeSelector": { + "matchLabels": { + "beta.kubernetes.io/instance-type": "Standard_B2ms" + } + }, + "storageClass": "worker-storagepolicy" + } + ], + "backupPVC": { + "kibishii-storage-class": { + "storageClass": "worker-storagepolicy" + } + } +} +``` + +Velero data mover supports to use different StorageClass to create backupPVC by [design](https://github.com/vmware-tanzu/velero/pull/7982). + +In this example, if the backup target PVC's StorageClass is `kibishii-storage-class`, its backupPVC should use StorageClass `worker-storagepolicy`. Because the final StorageClass is `worker-storagepolicy`, the backupPod uses the loadAffinity specified by `loadAffinity`'s elements with `StorageClass` field set to `worker-storagepolicy`. backupPod will be assigned to nodes, which instance type is `Standard_B2ms`. + +### LoadAffinity interacts with RestorePVC + +``` json +{ + "loadAffinity": [ + { + "nodeSelector": { + "matchLabels": { + "beta.kubernetes.io/instance-type": "Standard_B4ms" + } + }, + "storageClass": "kibishii-storage-class" + } + ], + "restorePVC": { + "ignoreDelayBinding": false + } +} +``` + +#### StorageClass's bind mode is WaitForFirstConsumer + +``` yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: kibishii-storage-class +parameters: + svStorageClass: worker-storagepolicy +provisioner: csi.vsphere.vmware.com +reclaimPolicy: Delete +volumeBindingMode: WaitForFirstConsumer +``` + +If restorePVC should be created from StorageClass `kibishii-storage-class`, and it's volumeBindingMode is `WaitForFirstConsumer`. +Although `loadAffinityPerStorageClass` has a section matches the StorageClass, the `ignoreDelayBinding` is set `false`, the Velero exposer will wait until the target Pod scheduled to a node, and returns the node as SelectedNode for the restorePVC. +As a result, the `loadAffinityPerStorageClass` will not take affect. + +#### StorageClass's bind mode is Immediate + +``` yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: kibishii-storage-class +parameters: + svStorageClass: worker-storagepolicy +provisioner: csi.vsphere.vmware.com +reclaimPolicy: Delete +volumeBindingMode: Immediate +``` + +Because the StorageClass volumeBindingMode is `Immediate`, although `ignoreDelayBinding` is set to `false`, restorePVC will not be created according to the target Pod. + +The restorePod will be assigned to nodes, which instance type is `Standard_B4ms`. diff --git a/site/data/docs/main-toc.yml b/site/data/docs/main-toc.yml index 731befd5e..22b273b38 100644 --- a/site/data/docs/main-toc.yml +++ b/site/data/docs/main-toc.yml @@ -49,16 +49,6 @@ toc: url: /namespace - page: CSI Support url: /csi - - page: CSI Snapshot Data Movement - url: /csi-snapshot-data-movement - - page: Node-agent Concurrency - url: /node-agent-concurrency - - page: Data Movement Backup PVC Configuration - url: /data-movement-backup-pvc-configuration - - page: Data Movement Restore PVC Configuration - url: /data-movement-restore-pvc-configuration - - page: Data Movement Pod Resource Configuration - url: /data-movement-pod-resource-configuration - page: Backup Repository Configuration url: /backup-repository-configuration - page: Verifying Self-signed Certificates @@ -71,6 +61,20 @@ toc: url: /repository-maintenance - page: Backup Restore Windows Workloads url: /backup-restore-windows + - title: Data Mover + subfolderitems: + - page: CSI Snapshot Data Mover + url: /csi-snapshot-data-movement + - page: Data Movement Backup PVC Configuration + url: /data-movement-backup-pvc-configuration + - page: Data Movement Restore PVC Configuration + url: /data-movement-restore-pvc-configuration + - page: Data Movement Pod Resource Configuration + url: /data-movement-pod-resource-configuration + - page: Data Movement Node Selection Configuration + url: /data-movement-node-selection + - page: Node-agent Concurrency + url: /node-agent-concurrency - title: Plugins subfolderitems: - page: Overview