mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-05 04:55:22 +00:00
Issue 8344: constrain data path expose (#9064)
Some checks failed
Run the E2E test on kind / build (push) Failing after 7m38s
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 39s
Close stale issues and PRs / stale (push) Successful in 22s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m32s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m41s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m30s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m18s
Some checks failed
Run the E2E test on kind / build (push) Failing after 7m38s
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 39s
Close stale issues and PRs / stale (push) Successful in 22s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m32s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m41s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m30s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m18s
* issue 8344: constrain data path exposure. Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
This commit is contained in:
@@ -74,6 +74,7 @@ type DataUploadReconciler struct {
|
||||
logger logrus.FieldLogger
|
||||
snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer
|
||||
dataPathMgr *datapath.Manager
|
||||
vgdpCounter *exposer.VgdpCounter
|
||||
loadAffinity []*kube.LoadAffinity
|
||||
backupPVCConfig map[string]nodeagent.BackupPVC
|
||||
podResources corev1api.ResourceRequirements
|
||||
@@ -88,6 +89,7 @@ func NewDataUploadReconciler(
|
||||
kubeClient kubernetes.Interface,
|
||||
csiSnapshotClient snapshotter.SnapshotV1Interface,
|
||||
dataPathMgr *datapath.Manager,
|
||||
counter *exposer.VgdpCounter,
|
||||
loadAffinity []*kube.LoadAffinity,
|
||||
backupPVCConfig map[string]nodeagent.BackupPVC,
|
||||
podResources corev1api.ResourceRequirements,
|
||||
@@ -113,6 +115,7 @@ func NewDataUploadReconciler(
|
||||
),
|
||||
},
|
||||
dataPathMgr: dataPathMgr,
|
||||
vgdpCounter: counter,
|
||||
loadAffinity: loadAffinity,
|
||||
backupPVCConfig: backupPVCConfig,
|
||||
podResources: podResources,
|
||||
@@ -241,6 +244,19 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
|
||||
}
|
||||
|
||||
if du.Status.Phase == "" || du.Status.Phase == velerov2alpha1api.DataUploadPhaseNew {
|
||||
if du.Spec.Cancel {
|
||||
log.Debugf("Data upload is canceled in Phase %s", du.Status.Phase)
|
||||
|
||||
r.tryCancelDataUpload(ctx, du, "")
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
if r.vgdpCounter != nil && r.vgdpCounter.IsConstrained(ctx, r.logger) {
|
||||
log.Debug("Data path initiation is constrained, requeue later")
|
||||
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
|
||||
}
|
||||
|
||||
log.Info("Data upload starting")
|
||||
|
||||
accepted, err := r.acceptDataUpload(ctx, du)
|
||||
@@ -255,11 +271,6 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
|
||||
|
||||
log.Info("Data upload is accepted")
|
||||
|
||||
if du.Spec.Cancel {
|
||||
r.OnDataUploadCancelled(ctx, du.GetNamespace(), du.GetName())
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
exposeParam, err := r.setupExposeParam(du)
|
||||
if err != nil {
|
||||
return r.errorOut(ctx, du, err, "failed to set exposer parameters", log)
|
||||
@@ -330,7 +341,7 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
|
||||
du.Name, du.Namespace, res.ByPod.HostingPod.Name, res.ByPod.HostingContainer, du.Name, callbacks, false, log)
|
||||
if err != nil {
|
||||
if err == datapath.ConcurrentLimitExceed {
|
||||
log.Info("Data path instance is concurrent limited requeue later")
|
||||
log.Debug("Data path instance is concurrent limited requeue later")
|
||||
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
|
||||
} else {
|
||||
return r.errorOut(ctx, du, err, "error to create data path", log)
|
||||
@@ -356,6 +367,8 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
|
||||
du.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
du.Status.NodeOS = velerov2alpha1api.NodeOS(*res.ByPod.NodeOS)
|
||||
|
||||
delete(du.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); err != nil {
|
||||
log.WithError(err).Warnf("Failed to update dataupload %s to InProgress, will data path close and retry", du.Name)
|
||||
@@ -481,6 +494,8 @@ func (r *DataUploadReconciler) OnDataUploadCompleted(ctx context.Context, namesp
|
||||
du.Status.Message = "volume was empty so no data was upload"
|
||||
}
|
||||
|
||||
delete(du.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); err != nil {
|
||||
log.WithError(err).Error("error updating DataUpload status")
|
||||
@@ -531,6 +546,8 @@ func (r *DataUploadReconciler) OnDataUploadCancelled(ctx context.Context, namesp
|
||||
}
|
||||
du.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
delete(du.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); err != nil {
|
||||
log.WithError(err).Error("error updating DataUpload status")
|
||||
@@ -552,6 +569,8 @@ func (r *DataUploadReconciler) tryCancelDataUpload(ctx context.Context, du *vele
|
||||
if message != "" {
|
||||
dataUpload.Status.Message = message
|
||||
}
|
||||
|
||||
delete(dataUpload.Labels, exposer.ExposeOnGoingLabel)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -760,6 +779,8 @@ func (r *DataUploadReconciler) updateStatusToFailed(ctx context.Context, du *vel
|
||||
}
|
||||
du.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
delete(du.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); patchErr != nil {
|
||||
log.WithError(patchErr).Error("error updating DataUpload status")
|
||||
@@ -781,6 +802,11 @@ func (r *DataUploadReconciler) acceptDataUpload(ctx context.Context, du *velerov
|
||||
dataUpload.Status.Phase = velerov2alpha1api.DataUploadPhaseAccepted
|
||||
dataUpload.Status.AcceptedByNode = r.nodeName
|
||||
dataUpload.Status.AcceptedTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
if dataUpload.Labels == nil {
|
||||
dataUpload.Labels = make(map[string]string)
|
||||
}
|
||||
dataUpload.Labels[exposer.ExposeOnGoingLabel] = "true"
|
||||
}
|
||||
|
||||
succeeded, err := funcExclusiveUpdateDataUpload(ctx, r.client, updated, updateFunc)
|
||||
@@ -807,6 +833,8 @@ func (r *DataUploadReconciler) onPrepareTimeout(ctx context.Context, du *velerov
|
||||
succeeded, err := funcExclusiveUpdateDataUpload(ctx, r.client, du, func(du *velerov2alpha1api.DataUpload) {
|
||||
du.Status.Phase = velerov2alpha1api.DataUploadPhaseFailed
|
||||
du.Status.Message = "timeout on preparing data upload"
|
||||
|
||||
delete(du.Labels, exposer.ExposeOnGoingLabel)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user