mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-05 04:55:22 +00:00
Issue 8344: constrain data path expose (#9064)
Some checks failed
Run the E2E test on kind / build (push) Failing after 7m38s
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 39s
Close stale issues and PRs / stale (push) Successful in 22s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m32s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m41s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m30s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m18s
Some checks failed
Run the E2E test on kind / build (push) Failing after 7m38s
Run the E2E test on kind / setup-test-matrix (push) Successful in 4s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / Build (push) Failing after 39s
Close stale issues and PRs / stale (push) Successful in 22s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m32s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m41s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m30s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m18s
* issue 8344: constrain data path exposure. Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
This commit is contained in:
@@ -64,6 +64,7 @@ type DataDownloadReconciler struct {
|
||||
restoreExposer exposer.GenericRestoreExposer
|
||||
nodeName string
|
||||
dataPathMgr *datapath.Manager
|
||||
vgdpCounter *exposer.VgdpCounter
|
||||
loadAffinity []*kube.LoadAffinity
|
||||
restorePVCConfig nodeagent.RestorePVC
|
||||
podResources corev1api.ResourceRequirements
|
||||
@@ -77,6 +78,7 @@ func NewDataDownloadReconciler(
|
||||
mgr manager.Manager,
|
||||
kubeClient kubernetes.Interface,
|
||||
dataPathMgr *datapath.Manager,
|
||||
counter *exposer.VgdpCounter,
|
||||
loadAffinity []*kube.LoadAffinity,
|
||||
restorePVCConfig nodeagent.RestorePVC,
|
||||
podResources corev1api.ResourceRequirements,
|
||||
@@ -95,6 +97,7 @@ func NewDataDownloadReconciler(
|
||||
restoreExposer: exposer.NewGenericRestoreExposer(kubeClient, logger),
|
||||
restorePVCConfig: restorePVCConfig,
|
||||
dataPathMgr: dataPathMgr,
|
||||
vgdpCounter: counter,
|
||||
loadAffinity: loadAffinity,
|
||||
podResources: podResources,
|
||||
preparingTimeout: preparingTimeout,
|
||||
@@ -220,13 +223,26 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
|
||||
}
|
||||
|
||||
if dd.Status.Phase == "" || dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseNew {
|
||||
log.Info("Data download starting")
|
||||
if dd.Spec.Cancel {
|
||||
log.Debugf("Data download is canceled in Phase %s", dd.Status.Phase)
|
||||
|
||||
r.tryCancelDataDownload(ctx, dd, "")
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
if r.vgdpCounter != nil && r.vgdpCounter.IsConstrained(ctx, r.logger) {
|
||||
log.Debug("Data path initiation is constrained, requeue later")
|
||||
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
|
||||
}
|
||||
|
||||
if _, err := r.getTargetPVC(ctx, dd); err != nil {
|
||||
log.WithField("error", err).Debugf("Cannot find target PVC for DataDownload yet. Retry later.")
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
|
||||
log.Info("Data download starting")
|
||||
|
||||
accepted, err := r.acceptDataDownload(ctx, dd)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, errors.Wrapf(err, "error accepting the data download %s", dd.Name)
|
||||
@@ -239,12 +255,6 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
|
||||
|
||||
log.Info("Data download is accepted")
|
||||
|
||||
if dd.Spec.Cancel {
|
||||
log.Debugf("Data download is been canceled %s in Phase %s", dd.GetName(), dd.Status.Phase)
|
||||
r.OnDataDownloadCancelled(ctx, dd.GetNamespace(), dd.GetName())
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
exposeParam, err := r.setupExposeParam(dd)
|
||||
if err != nil {
|
||||
return r.errorOut(ctx, dd, err, "failed to set exposer parameters", log)
|
||||
@@ -312,7 +322,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
|
||||
dd.Name, dd.Namespace, result.ByPod.HostingPod.Name, result.ByPod.HostingContainer, dd.Name, callbacks, false, log)
|
||||
if err != nil {
|
||||
if err == datapath.ConcurrentLimitExceed {
|
||||
log.Info("Data path instance is concurrent limited requeue later")
|
||||
log.Debug("Data path instance is concurrent limited requeue later")
|
||||
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
|
||||
} else {
|
||||
return r.errorOut(ctx, dd, err, "error to create data path", log)
|
||||
@@ -337,6 +347,8 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
|
||||
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseInProgress
|
||||
dd.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
delete(dd.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); err != nil {
|
||||
log.WithError(err).Warnf("Failed to update datadownload %s to InProgress, will data path close and retry", dd.Name)
|
||||
@@ -454,6 +466,8 @@ func (r *DataDownloadReconciler) OnDataDownloadCompleted(ctx context.Context, na
|
||||
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseCompleted
|
||||
dd.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
delete(dd.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); err != nil {
|
||||
log.WithError(err).Error("error updating data download status")
|
||||
@@ -504,6 +518,8 @@ func (r *DataDownloadReconciler) OnDataDownloadCancelled(ctx context.Context, na
|
||||
}
|
||||
dd.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
delete(dd.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); err != nil {
|
||||
log.WithError(err).Error("error updating data download status")
|
||||
@@ -525,6 +541,8 @@ func (r *DataDownloadReconciler) tryCancelDataDownload(ctx context.Context, dd *
|
||||
if message != "" {
|
||||
dataDownload.Status.Message = message
|
||||
}
|
||||
|
||||
delete(dataDownload.Labels, exposer.ExposeOnGoingLabel)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -702,6 +720,8 @@ func (r *DataDownloadReconciler) updateStatusToFailed(ctx context.Context, dd *v
|
||||
dd.Status.Message = errors.WithMessage(err, msg).Error()
|
||||
dd.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
delete(dd.Labels, exposer.ExposeOnGoingLabel)
|
||||
|
||||
return true
|
||||
}); patchErr != nil {
|
||||
log.WithError(patchErr).Error("error updating DataDownload status")
|
||||
@@ -724,6 +744,11 @@ func (r *DataDownloadReconciler) acceptDataDownload(ctx context.Context, dd *vel
|
||||
datadownload.Status.Phase = velerov2alpha1api.DataDownloadPhaseAccepted
|
||||
datadownload.Status.AcceptedByNode = r.nodeName
|
||||
datadownload.Status.AcceptedTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
|
||||
if datadownload.Labels == nil {
|
||||
datadownload.Labels = make(map[string]string)
|
||||
}
|
||||
datadownload.Labels[exposer.ExposeOnGoingLabel] = "true"
|
||||
}
|
||||
|
||||
succeeded, err := funcExclusiveUpdateDataDownload(ctx, r.client, updated, updateFunc)
|
||||
@@ -749,6 +774,8 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler
|
||||
succeeded, err := funcExclusiveUpdateDataDownload(ctx, r.client, dd, func(dd *velerov2alpha1api.DataDownload) {
|
||||
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseFailed
|
||||
dd.Status.Message = "timeout on preparing data download"
|
||||
|
||||
delete(dd.Labels, exposer.ExposeOnGoingLabel)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user