diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 8e1bc1219..a9f19b09f 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -12,7 +12,7 @@ jobs: get-go-version: uses: ./.github/workflows/get-go-version.yaml with: - ref: ${ github.ref } + ref: ${{ github.ref }} build: name: Build diff --git a/.github/workflows/stale-issues.yml b/.github/workflows/stale-issues.yml index 8f94ea65b..34f9dd11a 100644 --- a/.github/workflows/stale-issues.yml +++ b/.github/workflows/stale-issues.yml @@ -7,7 +7,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v10.0.0 + - uses: actions/stale@v10.1.0 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-issue-message: "This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 14 days. If a Velero team member has requested log or more information, please provide the output of the shared commands." diff --git a/changelogs/unreleased/9269-Lyndon-Li b/changelogs/unreleased/9269-Lyndon-Li new file mode 100644 index 000000000..2ed7cdd4f --- /dev/null +++ b/changelogs/unreleased/9269-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #7904, remove the code and doc for PVC node selection \ No newline at end of file diff --git a/changelogs/unreleased/9295-sseago b/changelogs/unreleased/9295-sseago new file mode 100644 index 000000000..92f44c7ba --- /dev/null +++ b/changelogs/unreleased/9295-sseago @@ -0,0 +1 @@ +Add option for privileged fs-backup pod diff --git a/changelogs/unreleased/9296-Lyndon-Li b/changelogs/unreleased/9296-Lyndon-Li new file mode 100644 index 000000000..696943ede --- /dev/null +++ b/changelogs/unreleased/9296-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #9267, add events to data mover prepare diagnostic \ No newline at end of file diff --git a/changelogs/unreleased/9302-blackpiglet b/changelogs/unreleased/9302-blackpiglet new file mode 100644 index 000000000..63576a535 --- /dev/null +++ b/changelogs/unreleased/9302-blackpiglet @@ -0,0 +1 @@ +VerifyJSONConfigs verify every elements in Data. diff --git a/changelogs/unreleased/9329-T4iFooN-IX b/changelogs/unreleased/9329-T4iFooN-IX new file mode 100644 index 000000000..2209ecb73 --- /dev/null +++ b/changelogs/unreleased/9329-T4iFooN-IX @@ -0,0 +1 @@ +Fix typos in documentation diff --git a/changelogs/unreleased/9333-Lyndon-Li b/changelogs/unreleased/9333-Lyndon-Li new file mode 100644 index 000000000..91d551881 --- /dev/null +++ b/changelogs/unreleased/9333-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #9332, add bytesDone for cache files \ No newline at end of file diff --git a/pkg/cmd/cli/install/install.go b/pkg/cmd/cli/install/install.go index c7a7dfe7a..6698010bd 100644 --- a/pkg/cmd/cli/install/install.go +++ b/pkg/cmd/cli/install/install.go @@ -545,24 +545,22 @@ func (o *Options) Validate(c *cobra.Command, args []string, f client.Factory) er return fmt.Errorf("fail to create go-client %w", err) } - // If either Linux or Windows node-agent is installed, and the node-agent-configmap - // is specified, need to validate the ConfigMap. - if (o.UseNodeAgent || o.UseNodeAgentWindows) && len(o.NodeAgentConfigMap) > 0 { + if len(o.NodeAgentConfigMap) > 0 { if err := kubeutil.VerifyJSONConfigs(c.Context(), o.Namespace, crClient, o.NodeAgentConfigMap, &velerotypes.NodeAgentConfigs{}); err != nil { - return fmt.Errorf("--node-agent-configmap specified ConfigMap %s is invalid", o.NodeAgentConfigMap) + return fmt.Errorf("--node-agent-configmap specified ConfigMap %s is invalid: %w", o.NodeAgentConfigMap, err) } } if len(o.RepoMaintenanceJobConfigMap) > 0 { if err := kubeutil.VerifyJSONConfigs(c.Context(), o.Namespace, crClient, o.RepoMaintenanceJobConfigMap, &velerotypes.JobConfigs{}); err != nil { - return fmt.Errorf("--repo-maintenance-job-configmap specified ConfigMap %s is invalid", o.RepoMaintenanceJobConfigMap) + return fmt.Errorf("--repo-maintenance-job-configmap specified ConfigMap %s is invalid: %w", o.RepoMaintenanceJobConfigMap, err) } } if len(o.BackupRepoConfigMap) > 0 { config := make(map[string]any) if err := kubeutil.VerifyJSONConfigs(c.Context(), o.Namespace, crClient, o.BackupRepoConfigMap, &config); err != nil { - return fmt.Errorf("--backup-repository-configmap specified ConfigMap %s is invalid", o.BackupRepoConfigMap) + return fmt.Errorf("--backup-repository-configmap specified ConfigMap %s is invalid: %w", o.BackupRepoConfigMap, err) } } diff --git a/pkg/cmd/cli/nodeagent/server.go b/pkg/cmd/cli/nodeagent/server.go index 873e03beb..d3563c0f5 100644 --- a/pkg/cmd/cli/nodeagent/server.go +++ b/pkg/cmd/cli/nodeagent/server.go @@ -308,6 +308,8 @@ func (s *nodeAgentServer) run() { s.logger.Infof("Using customized backupPVC config %v", backupPVCConfig) } + privilegedFsBackup := s.dataPathConfigs != nil && s.dataPathConfigs.PrivilegedFsBackup + podResources := corev1api.ResourceRequirements{} if s.dataPathConfigs != nil && s.dataPathConfigs.PodResources != nil { if res, err := kube.ParseResourceRequirements(s.dataPathConfigs.PodResources.CPURequest, s.dataPathConfigs.PodResources.MemoryRequest, s.dataPathConfigs.PodResources.CPULimit, s.dataPathConfigs.PodResources.MemoryLimit); err != nil { @@ -327,12 +329,12 @@ func (s *nodeAgentServer) run() { } } - pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass) + pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass, privilegedFsBackup) if err := pvbReconciler.SetupWithManager(s.mgr); err != nil { s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup) } - pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger, dataMovePriorityClass) + pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger, dataMovePriorityClass, privilegedFsBackup) if err := pvrReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller") } diff --git a/pkg/controller/pod_volume_backup_controller.go b/pkg/controller/pod_volume_backup_controller.go index 3a446379f..625ec8337 100644 --- a/pkg/controller/pod_volume_backup_controller.go +++ b/pkg/controller/pod_volume_backup_controller.go @@ -60,7 +60,7 @@ const ( // NewPodVolumeBackupReconciler creates the PodVolumeBackupReconciler instance func NewPodVolumeBackupReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, counter *exposer.VgdpCounter, nodeName string, preparingTimeout time.Duration, resourceTimeout time.Duration, podResources corev1api.ResourceRequirements, - metrics *metrics.ServerMetrics, logger logrus.FieldLogger, dataMovePriorityClass string) *PodVolumeBackupReconciler { + metrics *metrics.ServerMetrics, logger logrus.FieldLogger, dataMovePriorityClass string, privileged bool) *PodVolumeBackupReconciler { return &PodVolumeBackupReconciler{ client: client, mgr: mgr, @@ -77,6 +77,7 @@ func NewPodVolumeBackupReconciler(client client.Client, mgr manager.Manager, kub exposer: exposer.NewPodVolumeExposer(kubeClient, logger), cancelledPVB: make(map[string]time.Time), dataMovePriorityClass: dataMovePriorityClass, + privileged: privileged, } } @@ -97,6 +98,7 @@ type PodVolumeBackupReconciler struct { resourceTimeout time.Duration cancelledPVB map[string]time.Time dataMovePriorityClass string + privileged bool } // +kubebuilder:rbac:groups=velero.io,resources=podvolumebackups,verbs=get;list;watch;create;update;patch;delete @@ -837,6 +839,7 @@ func (r *PodVolumeBackupReconciler) setupExposeParam(pvb *velerov1api.PodVolumeB Resources: r.podResources, // Priority class name for the data mover pod, retrieved from node-agent-configmap PriorityClassName: r.dataMovePriorityClass, + Privileged: r.privileged, } } diff --git a/pkg/controller/pod_volume_backup_controller_test.go b/pkg/controller/pod_volume_backup_controller_test.go index 51e75edb2..a76b32b58 100644 --- a/pkg/controller/pod_volume_backup_controller_test.go +++ b/pkg/controller/pod_volume_backup_controller_test.go @@ -151,7 +151,8 @@ func initPVBReconcilerWithError(needError ...error) (*PodVolumeBackupReconciler, corev1api.ResourceRequirements{}, metrics.NewServerMetrics(), velerotest.NewLogger(), - "", // dataMovePriorityClass + "", // dataMovePriorityClass + false, // privileged ), nil } diff --git a/pkg/controller/pod_volume_restore_controller.go b/pkg/controller/pod_volume_restore_controller.go index ce0d312a0..0ed06b980 100644 --- a/pkg/controller/pod_volume_restore_controller.go +++ b/pkg/controller/pod_volume_restore_controller.go @@ -56,7 +56,7 @@ import ( func NewPodVolumeRestoreReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, counter *exposer.VgdpCounter, nodeName string, preparingTimeout time.Duration, resourceTimeout time.Duration, podResources corev1api.ResourceRequirements, - logger logrus.FieldLogger, dataMovePriorityClass string) *PodVolumeRestoreReconciler { + logger logrus.FieldLogger, dataMovePriorityClass string, privileged bool) *PodVolumeRestoreReconciler { return &PodVolumeRestoreReconciler{ client: client, mgr: mgr, @@ -72,6 +72,7 @@ func NewPodVolumeRestoreReconciler(client client.Client, mgr manager.Manager, ku exposer: exposer.NewPodVolumeExposer(kubeClient, logger), cancelledPVR: make(map[string]time.Time), dataMovePriorityClass: dataMovePriorityClass, + privileged: privileged, } } @@ -90,6 +91,7 @@ type PodVolumeRestoreReconciler struct { resourceTimeout time.Duration cancelledPVR map[string]time.Time dataMovePriorityClass string + privileged bool } // +kubebuilder:rbac:groups=velero.io,resources=podvolumerestores,verbs=get;list;watch;create;update;patch;delete @@ -896,6 +898,7 @@ func (r *PodVolumeRestoreReconciler) setupExposeParam(pvr *velerov1api.PodVolume Resources: r.podResources, // Priority class name for the data mover pod, retrieved from node-agent-configmap PriorityClassName: r.dataMovePriorityClass, + Privileged: r.privileged, } } diff --git a/pkg/controller/pod_volume_restore_controller_test.go b/pkg/controller/pod_volume_restore_controller_test.go index 409672c32..e993815b5 100644 --- a/pkg/controller/pod_volume_restore_controller_test.go +++ b/pkg/controller/pod_volume_restore_controller_test.go @@ -617,7 +617,7 @@ func initPodVolumeRestoreReconcilerWithError(objects []runtime.Object, cliObj [] dataPathMgr := datapath.NewManager(1) - return NewPodVolumeRestoreReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, nil, "test-node", time.Minute*5, time.Minute, corev1api.ResourceRequirements{}, velerotest.NewLogger(), ""), nil + return NewPodVolumeRestoreReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, nil, "test-node", time.Minute*5, time.Minute, corev1api.ResourceRequirements{}, velerotest.NewLogger(), "", false), nil } func TestPodVolumeRestoreReconcile(t *testing.T) { diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 781739ff5..d20638d7a 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -381,8 +381,13 @@ func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject cor diag += fmt.Sprintf("error getting backup vs %s, err: %v\n", backupVSName, err) } + events, err := e.kubeClient.CoreV1().Events(ownerObject.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + diag += fmt.Sprintf("error listing events, err: %v\n", err) + } + if pod != nil { - diag += kube.DiagnosePod(pod) + diag += kube.DiagnosePod(pod, events) if pod.Spec.NodeName != "" { if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil { @@ -392,7 +397,7 @@ func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject cor } if pvc != nil { - diag += kube.DiagnosePVC(pvc) + diag += kube.DiagnosePVC(pvc, events) if pvc.Spec.VolumeName != "" { if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil { @@ -404,7 +409,7 @@ func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject cor } if vs != nil { - diag += csi.DiagnoseVS(vs) + diag += csi.DiagnoseVS(vs, events) if vs.Status != nil && vs.Status.BoundVolumeSnapshotContentName != nil && *vs.Status.BoundVolumeSnapshotContentName != "" { if vsc, err := e.csiSnapshotClient.VolumeSnapshotContents().Get(ctx, *vs.Status.BoundVolumeSnapshotContentName, metav1.GetOptions{}); err != nil { diff --git a/pkg/exposer/csi_snapshot_test.go b/pkg/exposer/csi_snapshot_test.go index 7e8e6d883..d419b6126 100644 --- a/pkg/exposer/csi_snapshot_test.go +++ b/pkg/exposer/csi_snapshot_test.go @@ -1288,6 +1288,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-pod-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -1313,6 +1314,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-pod-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -1341,6 +1343,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-pvc-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -1359,6 +1362,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-pvc-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -1404,6 +1408,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-vs-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -1419,6 +1424,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-vs-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -1436,6 +1442,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-vs-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -1633,6 +1640,74 @@ PVC velero/fake-backup, phase Pending, binding to fake-pv PV fake-pv, phase Pending, reason , message fake-pv-message VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message VSC fake-vsc, readyToUse false, errMessage fake-vsc-message, handle +end diagnose CSI exposer`, + }, + { + name: "with events", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &backupPVCWithVolumeName, + &backupPV, + &nodeAgentPod, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-1"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-1"}, + Reason: "reason-1", + Message: "message-1", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-2"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-2", + Message: "message-2", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-3"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pvc-uid"}, + Reason: "reason-3", + Message: "message-3", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-4"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-vs-uid"}, + Reason: "reason-4", + Message: "message-4", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: "other-namespace", Name: "event-5"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-5", + Message: "message-5", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-6"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-6", + Message: "message-6", + }, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithVSC, + &backupVSC, + }, + expected: `begin diagnose CSI exposer +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +Pod event reason reason-2, message message-2 +Pod event reason reason-6, message message-6 +PVC velero/fake-backup, phase Pending, binding to fake-pv +PVC event reason reason-3, message message-3 +PV fake-pv, phase Pending, reason , message fake-pv-message +VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message +VS event reason reason-4, message message-4 +VSC fake-vsc, readyToUse false, errMessage fake-vsc-message, handle end diagnose CSI exposer`, }, } diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index 26019d5d4..8691eedfc 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -287,8 +287,13 @@ func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject diag += fmt.Sprintf("error getting restore pvc %s, err: %v\n", restorePVCName, err) } + events, err := e.kubeClient.CoreV1().Events(ownerObject.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + diag += fmt.Sprintf("error listing events, err: %v\n", err) + } + if pod != nil { - diag += kube.DiagnosePod(pod) + diag += kube.DiagnosePod(pod, events) if pod.Spec.NodeName != "" { if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil { @@ -298,7 +303,7 @@ func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject } if pvc != nil { - diag += kube.DiagnosePVC(pvc) + diag += kube.DiagnosePVC(pvc, events) if pvc.Spec.VolumeName != "" { if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil { diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index b5679889b..2e528d6a2 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -549,6 +549,7 @@ func Test_ReastoreDiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-restore", + UID: "fake-pod-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: restore.APIVersion, @@ -574,6 +575,7 @@ func Test_ReastoreDiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-restore", + UID: "fake-pod-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: restore.APIVersion, @@ -602,6 +604,7 @@ func Test_ReastoreDiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-restore", + UID: "fake-pvc-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: restore.APIVersion, @@ -620,6 +623,7 @@ func Test_ReastoreDiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-restore", + UID: "fake-pvc-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: restore.APIVersion, @@ -758,6 +762,60 @@ Pod velero/fake-restore, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-restore, phase Pending, binding to fake-pv PV fake-pv, phase Pending, reason , message fake-pv-message +end diagnose restore exposer`, + }, + { + name: "with events", + ownerRestore: restore, + kubeClientObj: []runtime.Object{ + &restorePodWithNodeName, + &restorePVCWithVolumeName, + &restorePV, + &nodeAgentPod, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-1"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-1"}, + Reason: "reason-1", + Message: "message-1", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-2"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-2", + Message: "message-2", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-3"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pvc-uid"}, + Reason: "reason-3", + Message: "message-3", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: "other-namespace", Name: "event-4"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-4", + Message: "message-4", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-5"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-5", + Message: "message-5", + }, + }, + expected: `begin diagnose restore exposer +Pod velero/fake-restore, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +Pod event reason reason-2, message message-2 +Pod event reason reason-5, message message-5 +PVC velero/fake-restore, phase Pending, binding to fake-pv +PVC event reason reason-3, message message-3 +PV fake-pv, phase Pending, reason , message fake-pv-message end diagnose restore exposer`, }, } diff --git a/pkg/exposer/pod_volume.go b/pkg/exposer/pod_volume.go index ea1fb2d1f..591600eb3 100644 --- a/pkg/exposer/pod_volume.go +++ b/pkg/exposer/pod_volume.go @@ -73,6 +73,9 @@ type PodVolumeExposeParam struct { // PriorityClassName is the priority class name for the data mover pod PriorityClassName string + + // Privileged indicates whether to create the pod with a privileged container + Privileged bool } // PodVolumeExposer is the interfaces for a pod volume exposer @@ -153,7 +156,7 @@ func (e *podVolumeExposer) Expose(ctx context.Context, ownerObject corev1api.Obj curLog.WithField("path", path).Infof("Host path is retrieved for pod %s, volume %s", param.ClientPodName, param.ClientPodVolume) - hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, param.HostingPodTolerations, pod.Spec.NodeName, param.Resources, nodeOS, param.PriorityClassName) + hostingPod, err := e.createHostingPod(ctx, ownerObject, param.Type, path.ByPath, param.OperationTimeout, param.HostingPodLabels, param.HostingPodAnnotations, param.HostingPodTolerations, pod.Spec.NodeName, param.Resources, nodeOS, param.PriorityClassName, param.Privileged) if err != nil { return errors.Wrapf(err, "error to create hosting pod") } @@ -248,8 +251,13 @@ func (e *podVolumeExposer) DiagnoseExpose(ctx context.Context, ownerObject corev diag += fmt.Sprintf("error getting hosting pod %s, err: %v\n", hostingPodName, err) } + events, err := e.kubeClient.CoreV1().Events(ownerObject.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + diag += fmt.Sprintf("error listing events, err: %v\n", err) + } + if pod != nil { - diag += kube.DiagnosePod(pod) + diag += kube.DiagnosePod(pod, events) if pod.Spec.NodeName != "" { if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil { @@ -269,7 +277,7 @@ func (e *podVolumeExposer) CleanUp(ctx context.Context, ownerObject corev1api.Ob } func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject corev1api.ObjectReference, exposeType string, hostPath string, - operationTimeout time.Duration, label map[string]string, annotation map[string]string, toleration []corev1api.Toleration, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string, priorityClassName string) (*corev1api.Pod, error) { + operationTimeout time.Duration, label map[string]string, annotation map[string]string, toleration []corev1api.Toleration, selectedNode string, resources corev1api.ResourceRequirements, nodeOS string, priorityClassName string, privileged bool) (*corev1api.Pod, error) { hostingPodName := ownerObject.Name containerName := string(ownerObject.UID) @@ -327,6 +335,7 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor args = append(args, podInfo.logLevelArgs...) var securityCtx *corev1api.PodSecurityContext + var containerSecurityCtx *corev1api.SecurityContext nodeSelector := map[string]string{} podOS := corev1api.PodOS{} if nodeOS == kube.NodeOSWindows { @@ -359,6 +368,9 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor securityCtx = &corev1api.PodSecurityContext{ RunAsUser: &userID, } + containerSecurityCtx = &corev1api.SecurityContext{ + Privileged: &privileged, + } nodeSelector[kube.NodeOSLabel] = kube.NodeOSLinux podOS.Name = kube.NodeOSLinux @@ -394,6 +406,7 @@ func (e *podVolumeExposer) createHostingPod(ctx context.Context, ownerObject cor Env: podInfo.env, EnvFrom: podInfo.envFrom, Resources: resources, + SecurityContext: containerSecurityCtx, }, }, PriorityClassName: priorityClassName, diff --git a/pkg/exposer/pod_volume_test.go b/pkg/exposer/pod_volume_test.go index f36fda4f4..f48e9376b 100644 --- a/pkg/exposer/pod_volume_test.go +++ b/pkg/exposer/pod_volume_test.go @@ -190,6 +190,29 @@ func TestPodVolumeExpose(t *testing.T) { return "/var/lib/kubelet/pods/pod-id-xxx/volumes/kubernetes.io~csi/pvc-id-xxx/mount", nil }, }, + { + name: "succeed with privileged pod", + ownerBackup: backup, + exposeParam: PodVolumeExposeParam{ + ClientNamespace: "fake-ns", + ClientPodName: "fake-client-pod", + ClientPodVolume: "fake-client-volume", + Privileged: true, + }, + kubeClientObj: []runtime.Object{ + podWithNode, + node, + daemonSet, + }, + funcGetPodVolumeHostPath: func(context.Context, *corev1api.Pod, string, kubernetes.Interface, filesystem.Interface, logrus.FieldLogger) (datapath.AccessPoint, error) { + return datapath.AccessPoint{ + ByPath: "/host_pods/pod-id-xxx/volumes/kubernetes.io~csi/pvc-id-xxx/mount", + }, nil + }, + funcExtractPodVolumeHostPath: func(context.Context, string, kubernetes.Interface, string, string) (string, error) { + return "/var/lib/kubelet/pods/pod-id-xxx/volumes/kubernetes.io~csi/pvc-id-xxx/mount", nil + }, + }, } for _, test := range tests { @@ -443,6 +466,7 @@ func TestPodVolumeDiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-pod-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -468,6 +492,7 @@ func TestPodVolumeDiagnoseExpose(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: velerov1.DefaultNamespace, Name: "fake-backup", + UID: "fake-pod-uid", OwnerReferences: []metav1.OwnerReference{ { APIVersion: backup.APIVersion, @@ -564,6 +589,48 @@ end diagnose pod volume exposer`, expected: `begin diagnose pod volume exposer Pod velero/fake-backup, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message +end diagnose pod volume exposer`, + }, + { + name: "with events", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &nodeAgentPod, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-1"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-1"}, + Reason: "reason-1", + Message: "message-1", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-2"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-2", + Message: "message-2", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: "other-namespace", Name: "event-3"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-3", + Message: "message-3", + }, + &corev1api.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: velerov1.DefaultNamespace, Name: "event-4"}, + Type: corev1api.EventTypeWarning, + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Reason: "reason-4", + Message: "message-4", + }, + }, + expected: `begin diagnose pod volume exposer +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +Pod event reason reason-2, message message-2 +Pod event reason reason-4, message message-4 end diagnose pod volume exposer`, }, } diff --git a/pkg/nodeagent/node_agent.go b/pkg/nodeagent/node_agent.go index 7268b589a..a5de2465c 100644 --- a/pkg/nodeagent/node_agent.go +++ b/pkg/nodeagent/node_agent.go @@ -143,6 +143,10 @@ func GetConfigs(ctx context.Context, namespace string, kubeClient kubernetes.Int return nil, errors.Errorf("data is not available in config map %s", configName) } + if len(cm.Data) > 1 { + return nil, errors.Errorf("more than one keys are found in ConfigMap %s's data. only expect one", configName) + } + jsonString := "" for _, v := range cm.Data { jsonString = v diff --git a/pkg/nodeagent/node_agent_test.go b/pkg/nodeagent/node_agent_test.go index bdc1085b4..cb46ee569 100644 --- a/pkg/nodeagent/node_agent_test.go +++ b/pkg/nodeagent/node_agent_test.go @@ -249,6 +249,7 @@ func TestGetConfigs(t *testing.T) { cmWithValidData := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"loadConcurrency\":{\"globalConfig\": 5}}").Result() cmWithPriorityClass := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"priorityClassName\": \"high-priority\"}").Result() cmWithPriorityClassAndOther := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key", "{\"priorityClassName\": \"low-priority\", \"loadConcurrency\":{\"globalConfig\": 3}}").Result() + cmWithMultipleKeysInData := builder.ForConfigMap("fake-ns", "node-agent-config").Data("fake-key-1", "{}", "fake-key-2", "{}").Result() tests := []struct { name string @@ -331,6 +332,14 @@ func TestGetConfigs(t *testing.T) { }, }, }, + { + name: "ConfigMap's Data has more than one key", + namespace: "fake-ns", + kubeClientObj: []runtime.Object{ + cmWithMultipleKeysInData, + }, + expectErr: "more than one keys are found in ConfigMap node-agent-config's data. only expect one", + }, } for _, test := range tests { diff --git a/pkg/restore/actions/pvc_action.go b/pkg/restore/actions/pvc_action.go index fdc0c26ec..a4a63374d 100644 --- a/pkg/restore/actions/pvc_action.go +++ b/pkg/restore/actions/pvc_action.go @@ -17,20 +17,15 @@ limitations under the License. package actions import ( - "context" - "github.com/pkg/errors" "github.com/sirupsen/logrus" corev1api "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/kuberesource" - "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/plugin/velero" "github.com/vmware-tanzu/velero/pkg/util" ) @@ -91,46 +86,13 @@ func (p *PVCAction) Execute(input *velero.RestoreItemActionExecuteInput) (*veler return nil, errors.WithStack(err) } - if pvc.Annotations == nil { - pvc.Annotations = make(map[string]string) - } - log := p.logger.WithFields(map[string]any{ "kind": pvc.Kind, "namespace": pvc.Namespace, "name": pvc.Name, }) - // Handle selected node annotation - node, ok := pvc.Annotations[AnnSelectedNode] - if ok { - // fetch node mapping from configMap - newNode, err := getNewNodeFromConfigMap(p.configMapClient, node) - if err != nil { - return nil, err - } - - if len(newNode) != 0 { - // Check whether the mapped node exists first. - exists, err := isNodeExist(p.nodeClient, newNode) - if err != nil { - return nil, errors.Wrapf(err, "error checking %s's mapped node %s existence", node, newNode) - } - if !exists { - log.Warnf("Selected-node's mapped node doesn't exist: source: %s, dest: %s. Please check the ConfigMap with label velero.io/change-pvc-node-selector.", node, newNode) - } - - // set node selector - // We assume that node exist for node-mapping - pvc.Annotations[AnnSelectedNode] = newNode - log.Infof("Updating selected-node to %s from %s", newNode, node) - } else { - log.Info("Clearing PVC selected-node annotation") - delete(pvc.Annotations, AnnSelectedNode) - } - } - - // Remove other annotations + // Remove PVC annotations removePVCAnnotations( &pvc, []string{ @@ -138,6 +100,7 @@ func (p *PVCAction) Execute(input *velero.RestoreItemActionExecuteInput) (*veler AnnBoundByController, AnnStorageProvisioner, AnnBetaStorageProvisioner, + AnnSelectedNode, velerov1api.VolumeSnapshotLabel, velerov1api.DataUploadNameAnnotation, }, @@ -167,34 +130,6 @@ func (p *PVCAction) Execute(input *velero.RestoreItemActionExecuteInput) (*veler return output, nil } -func getNewNodeFromConfigMap(client corev1client.ConfigMapInterface, node string) (string, error) { - // fetch node mapping from configMap - config, err := common.GetPluginConfig(common.PluginKindRestoreItemAction, "velero.io/change-pvc-node-selector", client) - if err != nil { - return "", err - } - - if config == nil { - // there is no node mapping defined for change-pvc-node - // so we will return empty new node - return "", nil - } - - return config.Data[node], nil -} - -// isNodeExist check if node resource exist or not -func isNodeExist(nodeClient corev1client.NodeInterface, name string) (bool, error) { - _, err := nodeClient.Get(context.TODO(), name, metav1.GetOptions{}) - if err != nil { - if apierrors.IsNotFound(err) { - return false, nil - } - return false, err - } - return true, nil -} - func removePVCAnnotations(pvc *corev1api.PersistentVolumeClaim, remove []string) { for k := range pvc.Annotations { if util.Contains(remove, k) { diff --git a/pkg/restore/actions/pvc_action_test.go b/pkg/restore/actions/pvc_action_test.go index f00ec9264..aa2ddeeef 100644 --- a/pkg/restore/actions/pvc_action_test.go +++ b/pkg/restore/actions/pvc_action_test.go @@ -17,11 +17,9 @@ limitations under the License. package actions import ( - "bytes" "fmt" "testing" - "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1api "k8s.io/api/core/v1" @@ -42,105 +40,57 @@ import ( // desired result. func TestPVCActionExecute(t *testing.T) { tests := []struct { - name string - pvc *corev1api.PersistentVolumeClaim - configMap *corev1api.ConfigMap - node *corev1api.Node - newNode *corev1api.Node - want *corev1api.PersistentVolumeClaim - wantErr error + name string + pvc *corev1api.PersistentVolumeClaim + want *corev1api.PersistentVolumeClaim + wantErr error }{ { - name: "a valid mapping for a persistent volume claim is applied correctly", - pvc: builder.ForPersistentVolumeClaim("source-ns", "pvc-1"). - ObjectMeta( - builder.WithAnnotations("volume.kubernetes.io/selected-node", "source-node"), - ).Result(), - configMap: builder.ForConfigMap("velero", "change-pvc-node"). - ObjectMeta(builder.WithLabels("velero.io/plugin-config", "", "velero.io/change-pvc-node-selector", "RestoreItemAction")). - Data("source-node", "dest-node"). - Result(), - newNode: builder.ForNode("dest-node").Result(), - want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1"). - ObjectMeta( - builder.WithAnnotations("volume.kubernetes.io/selected-node", "dest-node"), - ).Result(), - }, - { - name: "when no config map exists for the plugin, the item is returned without node selector", - pvc: builder.ForPersistentVolumeClaim("source-ns", "pvc-1"). - ObjectMeta( - builder.WithAnnotations("volume.kubernetes.io/selected-node", "source-node"), - ).Result(), - configMap: builder.ForConfigMap("velero", "change-pvc-node"). - ObjectMeta(builder.WithLabels("velero.io/plugin-config", "", "velero.io/some-other-plugin", "RestoreItemAction")). - Data("source-node", "dest-node"). - Result(), - node: builder.ForNode("source-node").Result(), - want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").Result(), - }, - { - name: "when no node-mappings exist in the plugin config map, the item is returned without node selector", - pvc: builder.ForPersistentVolumeClaim("source-ns", "pvc-1"). - ObjectMeta( - builder.WithAnnotations("volume.kubernetes.io/selected-node", "source-node"), - ).Result(), - configMap: builder.ForConfigMap("velero", "change-pvc-node"). - ObjectMeta(builder.WithLabels("velero.io/plugin-config", "", "velero.io/change-pvc-node-selector", "RestoreItemAction")). - Result(), - node: builder.ForNode("source-node").Result(), - want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").Result(), - }, - { - name: "when persistent volume claim has no node selector, the item is returned as-is", + name: "a persistent volume claim with no annotation", pvc: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").Result(), - configMap: builder.ForConfigMap("velero", "change-pvc-node"). - ObjectMeta(builder.WithLabels("velero.io/plugin-config", "", "velero.io/change-pvc-node-selector", "RestoreItemAction")). - Data("source-node", "dest-node"). - Result(), want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").Result(), }, { - name: "when persistent volume claim's node-selector has no mapping in the config map, the item is returned without node selector", + name: "a persistent volume claim with selected-node annotation", pvc: builder.ForPersistentVolumeClaim("source-ns", "pvc-1"). ObjectMeta( builder.WithAnnotations("volume.kubernetes.io/selected-node", "source-node"), ).Result(), - configMap: builder.ForConfigMap("velero", "change-pvc-node"). - ObjectMeta(builder.WithLabels("velero.io/plugin-config", "", "velero.io/change-pvc-node-selector", "RestoreItemAction")). - Data("source-node-1", "dest-node"). - Result(), - node: builder.ForNode("source-node").Result(), - want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").Result(), + want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").ObjectMeta(builder.WithAnnotationsMap(map[string]string{})).Result(), + }, + { + name: "a persistent volume claim with other annotation", + pvc: builder.ForPersistentVolumeClaim("source-ns", "pvc-1"). + ObjectMeta( + builder.WithAnnotations("other-anno-1", "other-value-1", "other-anno-2", "other-value-2"), + ).Result(), + want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").ObjectMeta( + builder.WithAnnotations("other-anno-1", "other-value-1", "other-anno-2", "other-value-2"), + ).Result(), + }, + { + name: "a persistent volume claim with other annotation and selected-node annotation", + pvc: builder.ForPersistentVolumeClaim("source-ns", "pvc-1"). + ObjectMeta( + builder.WithAnnotations("other-anno", "other-value", "volume.kubernetes.io/selected-node", "source-node"), + ).Result(), + want: builder.ForPersistentVolumeClaim("source-ns", "pvc-1").ObjectMeta( + builder.WithAnnotations("other-anno", "other-value"), + ).Result(), }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { clientset := fake.NewSimpleClientset() - logger := logrus.StandardLogger() - buf := bytes.Buffer{} - logrus.SetOutput(&buf) + a := NewPVCAction( - logger, + velerotest.NewLogger(), clientset.CoreV1().ConfigMaps("velero"), clientset.CoreV1().Nodes(), ) // set up test data - if tc.configMap != nil { - _, err := clientset.CoreV1().ConfigMaps(tc.configMap.Namespace).Create(t.Context(), tc.configMap, metav1.CreateOptions{}) - require.NoError(t, err) - } - - if tc.node != nil { - _, err := clientset.CoreV1().Nodes().Create(t.Context(), tc.node, metav1.CreateOptions{}) - require.NoError(t, err) - } - if tc.newNode != nil { - _, err := clientset.CoreV1().Nodes().Create(t.Context(), tc.newNode, metav1.CreateOptions{}) - require.NoError(t, err) - } unstructuredMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(tc.pvc) require.NoError(t, err) @@ -156,10 +106,6 @@ func TestPVCActionExecute(t *testing.T) { // execute method under test res, err := a.Execute(input) - // Make sure mapped selected-node exists. - logOutput := buf.String() - assert.NotContains(t, logOutput, "Selected-node's mapped node doesn't exist") - // validate for both error and non-error cases switch { case tc.wantErr != nil: diff --git a/pkg/types/node_agent.go b/pkg/types/node_agent.go index 778aefcf1..b335df275 100644 --- a/pkg/types/node_agent.go +++ b/pkg/types/node_agent.go @@ -84,4 +84,7 @@ type NodeAgentConfigs struct { // PriorityClassName is the priority class name for data mover pods created by the node agent PriorityClassName string `json:"priorityClassName,omitempty"` + + // PrivilegedFsBackup determines whether to create fs-backup pods as privileged pods + PrivilegedFsBackup bool `json:"privilegedFsBackup,omitempty"` } diff --git a/pkg/uploader/kopia/progress.go b/pkg/uploader/kopia/progress.go index 1fe0c41fe..b4e9ce1f2 100644 --- a/pkg/uploader/kopia/progress.go +++ b/pkg/uploader/kopia/progress.go @@ -121,6 +121,7 @@ func (p *Progress) UploadStarted() {} // CachedFile statistic the total bytes been cached currently func (p *Progress) CachedFile(fname string, numBytes int64) { atomic.AddInt64(&p.cachedBytes, numBytes) + atomic.AddInt64(&p.processedBytes, numBytes) p.UpdateProgress() } diff --git a/pkg/util/csi/volume_snapshot.go b/pkg/util/csi/volume_snapshot.go index 8e59dd69f..57e6f2e1d 100644 --- a/pkg/util/csi/volume_snapshot.go +++ b/pkg/util/csi/volume_snapshot.go @@ -689,7 +689,7 @@ func WaitUntilVSCHandleIsReady( return vsc, nil } -func DiagnoseVS(vs *snapshotv1api.VolumeSnapshot) string { +func DiagnoseVS(vs *snapshotv1api.VolumeSnapshot, events *corev1api.EventList) string { vscName := "" readyToUse := false errMessage := "" @@ -710,6 +710,14 @@ func DiagnoseVS(vs *snapshotv1api.VolumeSnapshot) string { diag := fmt.Sprintf("VS %s/%s, bind to %s, readyToUse %v, errMessage %s\n", vs.Namespace, vs.Name, vscName, readyToUse, errMessage) + if events != nil { + for _, e := range events.Items { + if e.InvolvedObject.UID == vs.UID && e.Type == corev1api.EventTypeWarning { + diag += fmt.Sprintf("VS event reason %s, message %s\n", e.Reason, e.Message) + } + } + } + return diag } diff --git a/pkg/util/csi/volume_snapshot_test.go b/pkg/util/csi/volume_snapshot_test.go index 91c9a1ea3..2f735559c 100644 --- a/pkg/util/csi/volume_snapshot_test.go +++ b/pkg/util/csi/volume_snapshot_test.go @@ -1699,6 +1699,7 @@ func TestDiagnoseVS(t *testing.T) { testCases := []struct { name string vs *snapshotv1api.VolumeSnapshot + events *corev1api.EventList expected string }{ { @@ -1781,11 +1782,81 @@ func TestDiagnoseVS(t *testing.T) { }, expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage fake-message\n", }, + { + name: "VS with VSC and empty event", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + }, + Status: &snapshotv1api.VolumeSnapshotStatus{ + BoundVolumeSnapshotContentName: &vscName, + ReadyToUse: &readyToUse, + Error: &snapshotv1api.VolumeSnapshotError{}, + }, + }, + events: &corev1api.EventList{}, + expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \n", + }, + { + name: "VS with VSC and events", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + UID: "fake-vs-uid", + }, + Status: &snapshotv1api.VolumeSnapshotStatus{ + BoundVolumeSnapshotContentName: &vscName, + ReadyToUse: &readyToUse, + Error: &snapshotv1api.VolumeSnapshotError{}, + }, + }, + events: &corev1api.EventList{Items: []corev1api.Event{ + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-1"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-1", + Message: "message-1", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-2"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-2", + Message: "message-2", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-vs-uid"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-3", + Message: "message-3", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-vs-uid"}, + Type: corev1api.EventTypeNormal, + Reason: "reason-4", + Message: "message-4", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-vs-uid"}, + Type: corev1api.EventTypeNormal, + Reason: "reason-5", + Message: "message-5", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-vs-uid"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-6", + Message: "message-6", + }, + }}, + expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \nVS event reason reason-3, message message-3\nVS event reason reason-6, message message-6\n", + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - diag := DiagnoseVS(tc.vs) + diag := DiagnoseVS(tc.vs, tc.events) assert.Equal(t, tc.expected, diag) }) } diff --git a/pkg/util/kube/pod.go b/pkg/util/kube/pod.go index 6b9cf7d58..86aa2e47b 100644 --- a/pkg/util/kube/pod.go +++ b/pkg/util/kube/pod.go @@ -268,13 +268,21 @@ func ToSystemAffinity(loadAffinities []*LoadAffinity) *corev1api.Affinity { return nil } -func DiagnosePod(pod *corev1api.Pod) string { +func DiagnosePod(pod *corev1api.Pod, events *corev1api.EventList) string { diag := fmt.Sprintf("Pod %s/%s, phase %s, node name %s\n", pod.Namespace, pod.Name, pod.Status.Phase, pod.Spec.NodeName) for _, condition := range pod.Status.Conditions { diag += fmt.Sprintf("Pod condition %s, status %s, reason %s, message %s\n", condition.Type, condition.Status, condition.Reason, condition.Message) } + if events != nil { + for _, e := range events.Items { + if e.InvolvedObject.UID == pod.UID && e.Type == corev1api.EventTypeWarning { + diag += fmt.Sprintf("Pod event reason %s, message %s\n", e.Reason, e.Message) + } + } + } + return diag } diff --git a/pkg/util/kube/pod_test.go b/pkg/util/kube/pod_test.go index f01d5ab35..ba930019e 100644 --- a/pkg/util/kube/pod_test.go +++ b/pkg/util/kube/pod_test.go @@ -896,10 +896,11 @@ func TestDiagnosePod(t *testing.T) { testCases := []struct { name string pod *corev1api.Pod + events *corev1api.EventList expected string }{ { - name: "pod with all info", + name: "pod with all info but event", pod: &corev1api.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "fake-pod", @@ -928,11 +929,111 @@ func TestDiagnosePod(t *testing.T) { }, expected: "Pod fake-ns/fake-pod, phase Pending, node name fake-node\nPod condition Initialized, status True, reason fake-reason-1, message fake-message-1\nPod condition PodScheduled, status False, reason fake-reason-2, message fake-message-2\n", }, + { + name: "pod with all info and empty event list", + pod: &corev1api.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pod", + Namespace: "fake-ns", + }, + Spec: corev1api.PodSpec{ + NodeName: "fake-node", + }, + Status: corev1api.PodStatus{ + Phase: corev1api.PodPending, + Conditions: []corev1api.PodCondition{ + { + Type: corev1api.PodInitialized, + Status: corev1api.ConditionTrue, + Reason: "fake-reason-1", + Message: "fake-message-1", + }, + { + Type: corev1api.PodScheduled, + Status: corev1api.ConditionFalse, + Reason: "fake-reason-2", + Message: "fake-message-2", + }, + }, + }, + }, + events: &corev1api.EventList{}, + expected: "Pod fake-ns/fake-pod, phase Pending, node name fake-node\nPod condition Initialized, status True, reason fake-reason-1, message fake-message-1\nPod condition PodScheduled, status False, reason fake-reason-2, message fake-message-2\n", + }, + { + name: "pod with all info and events", + pod: &corev1api.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pod", + Namespace: "fake-ns", + UID: "fake-pod-uid", + }, + Spec: corev1api.PodSpec{ + NodeName: "fake-node", + }, + Status: corev1api.PodStatus{ + Phase: corev1api.PodPending, + Conditions: []corev1api.PodCondition{ + { + Type: corev1api.PodInitialized, + Status: corev1api.ConditionTrue, + Reason: "fake-reason-1", + Message: "fake-message-1", + }, + { + Type: corev1api.PodScheduled, + Status: corev1api.ConditionFalse, + Reason: "fake-reason-2", + Message: "fake-message-2", + }, + }, + }, + }, + events: &corev1api.EventList{Items: []corev1api.Event{ + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-1"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-1", + Message: "message-1", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-2"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-2", + Message: "message-2", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-3", + Message: "message-3", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Type: corev1api.EventTypeNormal, + Reason: "reason-4", + Message: "message-4", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Type: corev1api.EventTypeNormal, + Reason: "reason-5", + Message: "message-5", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pod-uid"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-6", + Message: "message-6", + }, + }}, + expected: "Pod fake-ns/fake-pod, phase Pending, node name fake-node\nPod condition Initialized, status True, reason fake-reason-1, message fake-message-1\nPod condition PodScheduled, status False, reason fake-reason-2, message fake-message-2\nPod event reason reason-3, message message-3\nPod event reason reason-6, message message-6\n", + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - diag := DiagnosePod(tc.pod) + diag := DiagnosePod(tc.pod, tc.events) assert.Equal(t, tc.expected, diag) }) } diff --git a/pkg/util/kube/pvc_pv.go b/pkg/util/kube/pvc_pv.go index e18d33c77..786cef2a5 100644 --- a/pkg/util/kube/pvc_pv.go +++ b/pkg/util/kube/pvc_pv.go @@ -463,8 +463,18 @@ func GetPVCForPodVolume(vol *corev1api.Volume, pod *corev1api.Pod, crClient crcl return pvc, nil } -func DiagnosePVC(pvc *corev1api.PersistentVolumeClaim) string { - return fmt.Sprintf("PVC %s/%s, phase %s, binding to %s\n", pvc.Namespace, pvc.Name, pvc.Status.Phase, pvc.Spec.VolumeName) +func DiagnosePVC(pvc *corev1api.PersistentVolumeClaim, events *corev1api.EventList) string { + diag := fmt.Sprintf("PVC %s/%s, phase %s, binding to %s\n", pvc.Namespace, pvc.Name, pvc.Status.Phase, pvc.Spec.VolumeName) + + if events != nil { + for _, e := range events.Items { + if e.InvolvedObject.UID == pvc.UID && e.Type == corev1api.EventTypeWarning { + diag += fmt.Sprintf("PVC event reason %s, message %s\n", e.Reason, e.Message) + } + } + } + + return diag } func DiagnosePV(pv *corev1api.PersistentVolume) string { diff --git a/pkg/util/kube/pvc_pv_test.go b/pkg/util/kube/pvc_pv_test.go index f52cdeb98..d94efa62e 100644 --- a/pkg/util/kube/pvc_pv_test.go +++ b/pkg/util/kube/pvc_pv_test.go @@ -1593,10 +1593,11 @@ func TestDiagnosePVC(t *testing.T) { testCases := []struct { name string pvc *corev1api.PersistentVolumeClaim + events *corev1api.EventList expected string }{ { - name: "pvc with all info", + name: "pvc with all info but events", pvc: &corev1api.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ Name: "fake-pvc", @@ -1611,11 +1612,83 @@ func TestDiagnosePVC(t *testing.T) { }, expected: "PVC fake-ns/fake-pvc, phase Pending, binding to fake-pv\n", }, + { + name: "pvc with all info and empty events", + pvc: &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pvc", + Namespace: "fake-ns", + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "fake-pv", + }, + Status: corev1api.PersistentVolumeClaimStatus{ + Phase: corev1api.ClaimPending, + }, + }, + events: &corev1api.EventList{}, + expected: "PVC fake-ns/fake-pvc, phase Pending, binding to fake-pv\n", + }, + { + name: "pvc with all info and events", + pvc: &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pvc", + Namespace: "fake-ns", + UID: "fake-pvc-uid", + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "fake-pv", + }, + Status: corev1api.PersistentVolumeClaimStatus{ + Phase: corev1api.ClaimPending, + }, + }, + events: &corev1api.EventList{Items: []corev1api.Event{ + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-1"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-1", + Message: "message-1", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-uid-2"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-2", + Message: "message-2", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pvc-uid"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-3", + Message: "message-3", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pvc-uid"}, + Type: corev1api.EventTypeNormal, + Reason: "reason-4", + Message: "message-4", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pvc-uid"}, + Type: corev1api.EventTypeNormal, + Reason: "reason-5", + Message: "message-5", + }, + { + InvolvedObject: corev1api.ObjectReference{UID: "fake-pvc-uid"}, + Type: corev1api.EventTypeWarning, + Reason: "reason-6", + Message: "message-6", + }, + }}, + expected: "PVC fake-ns/fake-pvc, phase Pending, binding to fake-pv\nPVC event reason reason-3, message message-3\nPVC event reason reason-6, message message-6\n", + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - diag := DiagnosePVC(tc.pvc) + diag := DiagnosePVC(tc.pvc, tc.events) assert.Equal(t, tc.expected, diag) }) } diff --git a/pkg/util/kube/utils.go b/pkg/util/kube/utils.go index 002070376..5e5e97603 100644 --- a/pkg/util/kube/utils.go +++ b/pkg/util/kube/utils.go @@ -371,15 +371,16 @@ func VerifyJSONConfigs(ctx context.Context, namespace string, crClient client.Cl return errors.Errorf("data is not available in ConfigMap %s", configName) } + // Verify all the keys in ConfigMap's data. jsonString := "" for _, v := range cm.Data { jsonString = v - } - configs := configType - err = json.Unmarshal([]byte(jsonString), configs) - if err != nil { - return errors.Wrapf(err, "error to unmarshall data from ConfigMap %s", configName) + configs := configType + err = json.Unmarshal([]byte(jsonString), configs) + if err != nil { + return errors.Wrapf(err, "error to unmarshall data from ConfigMap %s", configName) + } } return nil diff --git a/site/content/docs/main/customize-installation.md b/site/content/docs/main/customize-installation.md index 2ac23e5cd..d62945a35 100644 --- a/site/content/docs/main/customize-installation.md +++ b/site/content/docs/main/customize-installation.md @@ -23,6 +23,8 @@ By default, `velero install` does not install Velero's [File System Backup][3]. If you've already run `velero install` without the `--use-node-agent` flag, you can run the same command again, including the `--use-node-agent` flag, to add the file system backup to your existing install. +Note that for some use cases (including installation on OpenShift clusters) the fs-backup pods must run in a Privileged security context. This is configured through the node-agent configmap (see below) by setting `privilegedFsBackup` to `true` in the configmap. + ## CSI Snapshot Data Movement Velero node-agent is required by [CSI Snapshot Data Movement][12] when Velero built-in data mover is used. By default, `velero install` does not install Velero's node-agent. To enable it, specify the `--use-node-agent` flag. diff --git a/site/content/docs/main/data-movement-pod-resource-configuration.md b/site/content/docs/main/data-movement-pod-resource-configuration.md index 9dfab4a32..53e56d4af 100644 --- a/site/content/docs/main/data-movement-pod-resource-configuration.md +++ b/site/content/docs/main/data-movement-pod-resource-configuration.md @@ -15,7 +15,7 @@ Note: If less resources are assigned to data mover pods, the data movement activ Refer to [Performance Guidance][3] for a guidance of performance vs. resource usage, and it is highly recommended that you perform your own testing to find the best resource limits for your data. Velero introduces a new section in the node-agent configMap, called ```podResources```, through which you can set customized resources configurations for data mover pods. -If it is not there, a configMap should be created manually. The configMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one configMap in each namespace which applies to node-agent in that namespace only. The name of the configMap should be specified in the node-agent server parameter ```--node-agent-config```. +If it is not there, a configMap should be created manually. The configMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one configMap in each namespace which applies to node-agent in that namespace only. The name of the configMap should be specified in the node-agent server parameter ```--node-agent-configmap```. Node-agent server checks these configurations at startup time. Therefore, you could edit this configMap any time, but in order to make the changes effective, node-agent server needs to be restarted. ### Sample @@ -39,19 +39,19 @@ To create the configMap, save something like the above sample to a json file and kubectl create cm node-agent-config -n velero --from-file= ``` -To provide the configMap to node-agent, edit the node-agent daemonset and add the ```- --node-agent-config``` argument to the spec: +To provide the configMap to node-agent, edit the node-agent daemonset and add the ```- --node-agent-configmap``` argument to the spec: 1. Open the node-agent daemonset spec ``` kubectl edit ds node-agent -n velero ``` -2. Add ```- --node-agent-config``` to ```spec.template.spec.containers``` +2. Add ```- --node-agent-configmap``` to ```spec.template.spec.containers``` ``` spec: template: spec: containers: - args: - - --node-agent-config= + - --node-agent-configmap= ``` ### Priority Class @@ -126,4 +126,4 @@ kubectl create cm node-agent-config -n velero --from-file=node-agent-config.json [1]: csi-snapshot-data-movement.md [2]: file-system-backup.md [3]: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/ -[4]: performance-guidance.md \ No newline at end of file +[4]: performance-guidance.md diff --git a/site/content/docs/main/node-agent-prepare-queue-length.md b/site/content/docs/main/node-agent-prepare-queue-length.md index aa2770175..58f81ecf0 100644 --- a/site/content/docs/main/node-agent-prepare-queue-length.md +++ b/site/content/docs/main/node-agent-prepare-queue-length.md @@ -27,22 +27,22 @@ To create the configMap, save something like the above sample to a json file and kubectl create cm node-agent-config -n velero --from-file= ``` -To provide the configMap to node-agent, edit the node-agent daemonset and add the ```- --node-agent-config``` argument to the spec: +To provide the configMap to node-agent, edit the node-agent daemonset and add the ```- --node-agent-configmap`` argument to the spec: 1. Open the node-agent daemonset spec ``` kubectl edit ds node-agent -n velero ``` -2. Add ```- --node-agent-config``` to ```spec.template.spec.containers``` +2. Add ```- --node-agent-configmap``` to ```spec.template.spec.containers``` ``` spec: template: spec: containers: - args: - - --node-agent-config= + - --node-agent-configmap= ``` [1]: csi-snapshot-data-movement.md [2]: file-system-backup.md [3]: node-agent-concurrency.md -[4]: data-movement-node-selection.md \ No newline at end of file +[4]: data-movement-node-selection.md diff --git a/site/content/docs/main/restore-reference.md b/site/content/docs/main/restore-reference.md index cf5418f7b..6c5394cf0 100644 --- a/site/content/docs/main/restore-reference.md +++ b/site/content/docs/main/restore-reference.md @@ -215,37 +215,9 @@ data: ### PVC selected-node -Velero by default removes PVC's `volume.kubernetes.io/selected-node` annotation during restore, so that the restored PVC could be provisioned appropriately according to ```WaitForFirstConsumer``` rules, storage topologies and the restored pod's schedule result, etc. +Velero removes PVC's `volume.kubernetes.io/selected-node` annotation during restore, so that the restored PVC could be provisioned appropriately according to ```WaitForFirstConsumer``` rules, storage topologies and the restored pod's schedule result, etc. -For more information of how this selected-node annotation matters to PVC restore, see issue https://github.com/vmware-tanzu/velero/issues/9053. - -As an expectation, when you provide the selected-node configuration, Velero sets the annotation to the node in the configuration, if the node doesn't exist in the cluster then the annotation will also be removed. -Note: This feature is under deprecation as of Velero 1.15, following Velero deprecation policy. This feature is primarily used to remedy some problems in old Kubernetes versions as described [here](https://github.com/vmware-tanzu/velero/pull/2377). It may not work with the new features of Kubernetes and Velero. For more information, see issue https://github.com/vmware-tanzu/velero/issues/9053 for more information. -To configure a selected-node, create a config map in the Velero namespace like the following: - -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - # any name can be used; Velero uses the labels (below) - # to identify it rather than the name - name: change-pvc-node-selector-config - # must be in the velero namespace - namespace: velero - # the below labels should be used verbatim in your - # ConfigMap. - labels: - # this value-less label identifies the ConfigMap as - # config for a plugin (i.e. the built-in restore item action plugin) - velero.io/plugin-config: "" - # this label identifies the name and kind of plugin - # that this ConfigMap is for. - velero.io/change-pvc-node-selector: RestoreItemAction -data: - # add 1+ key-value pairs here, where the key is the old - # node name and the value is the new node name. - : -``` +For more information of how this selected-node annotation matters to PVC restore, see issue https://github.com/vmware-tanzu/velero/issues/9053. ## Restoring into a different namespace