mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-06-06 22:32:40 +00:00
Merge pull request #7437 from Lyndon-Li/issue-fix-7036
Issue 7036: node selection for data mover backup
This commit is contained in:
@@ -214,7 +214,10 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
|
||||
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseAccepted {
|
||||
if dd.Spec.Cancel {
|
||||
log.Debugf("Data download is been canceled %s in Phase %s", dd.GetName(), dd.Status.Phase)
|
||||
r.TryCancelDataDownload(ctx, dd)
|
||||
r.TryCancelDataDownload(ctx, dd, "")
|
||||
} else if peekErr := r.restoreExposer.PeekExposed(ctx, getDataDownloadOwnerObject(dd)); peekErr != nil {
|
||||
r.TryCancelDataDownload(ctx, dd, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
|
||||
log.Errorf("Cancel dd %s/%s because of expose error %s", dd.Namespace, dd.Name, peekErr)
|
||||
} else if dd.Status.StartTimestamp != nil {
|
||||
if time.Since(dd.Status.StartTimestamp.Time) >= r.preparingTimeout {
|
||||
r.onPrepareTimeout(ctx, dd)
|
||||
@@ -418,7 +421,7 @@ func (r *DataDownloadReconciler) OnDataDownloadCancelled(ctx context.Context, na
|
||||
}
|
||||
}
|
||||
|
||||
func (r *DataDownloadReconciler) TryCancelDataDownload(ctx context.Context, dd *velerov2alpha1api.DataDownload) {
|
||||
func (r *DataDownloadReconciler) TryCancelDataDownload(ctx context.Context, dd *velerov2alpha1api.DataDownload, message string) {
|
||||
log := r.logger.WithField("datadownload", dd.Name)
|
||||
log.Warn("Async fs backup data path canceled")
|
||||
|
||||
@@ -428,6 +431,7 @@ func (r *DataDownloadReconciler) TryCancelDataDownload(ctx context.Context, dd *
|
||||
dataDownload.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
}
|
||||
dataDownload.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
dataDownload.Status.Message = message
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -174,6 +174,7 @@ func TestDataDownloadReconcile(t *testing.T) {
|
||||
needCreateFSBR bool
|
||||
isExposeErr bool
|
||||
isGetExposeErr bool
|
||||
isPeekExposeErr bool
|
||||
isNilExposer bool
|
||||
isFSBRInitErr bool
|
||||
isFSBRRestoreErr bool
|
||||
@@ -302,6 +303,12 @@ func TestDataDownloadReconcile(t *testing.T) {
|
||||
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).StartTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
|
||||
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Result(),
|
||||
},
|
||||
{
|
||||
name: "peek error",
|
||||
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
|
||||
isPeekExposeErr: true,
|
||||
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Result(),
|
||||
},
|
||||
{
|
||||
name: "dataDownload with enabled cancel",
|
||||
dd: func() *velerov2alpha1api.DataDownload {
|
||||
@@ -369,7 +376,7 @@ func TestDataDownloadReconcile(t *testing.T) {
|
||||
return fsBR
|
||||
}
|
||||
|
||||
if test.isExposeErr || test.isGetExposeErr || test.isNilExposer || test.notNilExpose {
|
||||
if test.isExposeErr || test.isGetExposeErr || test.isPeekExposeErr || test.isNilExposer || test.notNilExpose {
|
||||
if test.isNilExposer {
|
||||
r.restoreExposer = nil
|
||||
} else {
|
||||
@@ -383,6 +390,8 @@ func TestDataDownloadReconcile(t *testing.T) {
|
||||
ep.On("GetExposed", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(&exposer.ExposeResult{ByPod: exposer.ExposeByPod{HostingPod: hostingPod, VolumeName: "test-pvc"}}, nil)
|
||||
} else if test.isGetExposeErr {
|
||||
ep.On("GetExposed", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("Error to get restore exposer"))
|
||||
} else if test.isPeekExposeErr {
|
||||
ep.On("PeekExposed", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(errors.New("fake-peek-error"))
|
||||
}
|
||||
|
||||
if !test.notMockCleanUp {
|
||||
@@ -801,7 +810,7 @@ func TestTryCancelDataDownload(t *testing.T) {
|
||||
err = r.client.Create(ctx, test.dd)
|
||||
require.NoError(t, err)
|
||||
|
||||
r.TryCancelDataDownload(ctx, test.dd)
|
||||
r.TryCancelDataDownload(ctx, test.dd, "")
|
||||
|
||||
if test.expectedErr == "" {
|
||||
assert.NoError(t, err)
|
||||
|
||||
@@ -48,6 +48,7 @@ import (
|
||||
"github.com/vmware-tanzu/velero/pkg/datapath"
|
||||
"github.com/vmware-tanzu/velero/pkg/exposer"
|
||||
"github.com/vmware-tanzu/velero/pkg/metrics"
|
||||
"github.com/vmware-tanzu/velero/pkg/nodeagent"
|
||||
"github.com/vmware-tanzu/velero/pkg/repository"
|
||||
"github.com/vmware-tanzu/velero/pkg/uploader"
|
||||
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
|
||||
@@ -74,12 +75,13 @@ type DataUploadReconciler struct {
|
||||
logger logrus.FieldLogger
|
||||
snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer
|
||||
dataPathMgr *datapath.Manager
|
||||
loadAffinity *nodeagent.LoadAffinity
|
||||
preparingTimeout time.Duration
|
||||
metrics *metrics.ServerMetrics
|
||||
}
|
||||
|
||||
func NewDataUploadReconciler(client client.Client, kubeClient kubernetes.Interface, csiSnapshotClient snapshotter.SnapshotV1Interface,
|
||||
dataPathMgr *datapath.Manager, repoEnsurer *repository.Ensurer, clock clocks.WithTickerAndDelayedExecution,
|
||||
dataPathMgr *datapath.Manager, loadAffinity *nodeagent.LoadAffinity, repoEnsurer *repository.Ensurer, clock clocks.WithTickerAndDelayedExecution,
|
||||
cred *credentials.CredentialGetter, nodeName string, fs filesystem.Interface, preparingTimeout time.Duration, log logrus.FieldLogger, metrics *metrics.ServerMetrics) *DataUploadReconciler {
|
||||
return &DataUploadReconciler{
|
||||
client: client,
|
||||
@@ -93,6 +95,7 @@ func NewDataUploadReconciler(client client.Client, kubeClient kubernetes.Interfa
|
||||
repoEnsurer: repoEnsurer,
|
||||
snapshotExposerList: map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{velerov2alpha1api.SnapshotTypeCSI: exposer.NewCSISnapshotExposer(kubeClient, csiSnapshotClient, log)},
|
||||
dataPathMgr: dataPathMgr,
|
||||
loadAffinity: loadAffinity,
|
||||
preparingTimeout: preparingTimeout,
|
||||
metrics: metrics,
|
||||
}
|
||||
@@ -224,7 +227,10 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
|
||||
// we don't want to update CR into cancel status forcely as it may conflict with CR update in Expose action
|
||||
// we could retry when the CR requeue in periodcally
|
||||
log.Debugf("Data upload is been canceled %s in Phase %s", du.GetName(), du.Status.Phase)
|
||||
r.TryCancelDataUpload(ctx, du)
|
||||
r.TryCancelDataUpload(ctx, du, "")
|
||||
} else if peekErr := ep.PeekExposed(ctx, getOwnerObject(du)); peekErr != nil {
|
||||
r.TryCancelDataUpload(ctx, du, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
|
||||
log.Errorf("Cancel du %s/%s because of expose error %s", du.Namespace, du.Name, peekErr)
|
||||
} else if du.Status.StartTimestamp != nil {
|
||||
if time.Since(du.Status.StartTimestamp.Time) >= r.preparingTimeout {
|
||||
r.onPrepareTimeout(ctx, du)
|
||||
@@ -440,7 +446,7 @@ func (r *DataUploadReconciler) OnDataUploadCancelled(ctx context.Context, namesp
|
||||
}
|
||||
|
||||
// TryCancelDataUpload clear up resources only when update success
|
||||
func (r *DataUploadReconciler) TryCancelDataUpload(ctx context.Context, du *velerov2alpha1api.DataUpload) {
|
||||
func (r *DataUploadReconciler) TryCancelDataUpload(ctx context.Context, du *velerov2alpha1api.DataUpload, message string) {
|
||||
log := r.logger.WithField("dataupload", du.Name)
|
||||
log.Warn("Async fs backup data path canceled")
|
||||
succeeded, err := r.exclusiveUpdateDataUpload(ctx, du, func(dataUpload *velerov2alpha1api.DataUpload) {
|
||||
@@ -449,6 +455,7 @@ func (r *DataUploadReconciler) TryCancelDataUpload(ctx context.Context, du *vele
|
||||
dataUpload.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
}
|
||||
dataUpload.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
|
||||
dataUpload.Status.Message = message
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -825,6 +832,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload
|
||||
OperationTimeout: du.Spec.OperationTimeout.Duration,
|
||||
ExposeTimeout: r.preparingTimeout,
|
||||
VolumeSize: pvc.Spec.Resources.Requests[corev1.ResourceStorage],
|
||||
Affinity: r.loadAffinity,
|
||||
}, nil
|
||||
}
|
||||
return nil, nil
|
||||
|
||||
@@ -232,7 +232,7 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewDataUploadReconciler(fakeClient, fakeKubeClient, fakeSnapshotClient.SnapshotV1(), dataPathMgr, nil,
|
||||
return NewDataUploadReconciler(fakeClient, fakeKubeClient, fakeSnapshotClient.SnapshotV1(), dataPathMgr, nil, nil,
|
||||
testclocks.NewFakeClock(now), &credentials.CredentialGetter{FromFile: credentialFileStore}, "test_node", fakeFS, time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics()), nil
|
||||
}
|
||||
|
||||
@@ -252,6 +252,7 @@ func dataUploadBuilder() *builder.DataUploadBuilder {
|
||||
type fakeSnapshotExposer struct {
|
||||
kubeClient kbclient.Client
|
||||
clock clock.WithTickerAndDelayedExecution
|
||||
peekErr error
|
||||
}
|
||||
|
||||
func (f *fakeSnapshotExposer) Expose(ctx context.Context, ownerObject corev1.ObjectReference, param interface{}) error {
|
||||
@@ -283,6 +284,10 @@ func (f *fakeSnapshotExposer) GetExposed(ctx context.Context, du corev1.ObjectRe
|
||||
return &exposer.ExposeResult{ByPod: exposer.ExposeByPod{HostingPod: pod, VolumeName: dataUploadName}}, nil
|
||||
}
|
||||
|
||||
func (f *fakeSnapshotExposer) PeekExposed(ctx context.Context, ownerObject corev1.ObjectReference) error {
|
||||
return f.peekErr
|
||||
}
|
||||
|
||||
func (f *fakeSnapshotExposer) CleanUp(context.Context, corev1.ObjectReference, string, string) {
|
||||
}
|
||||
|
||||
@@ -330,6 +335,7 @@ func TestReconcile(t *testing.T) {
|
||||
expectedRequeue ctrl.Result
|
||||
expectedErrMsg string
|
||||
needErrs []bool
|
||||
peekErr error
|
||||
}{
|
||||
{
|
||||
name: "Dataupload is not initialized",
|
||||
@@ -420,6 +426,13 @@ func TestReconcile(t *testing.T) {
|
||||
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).StartTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
|
||||
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
|
||||
},
|
||||
{
|
||||
name: "peek error",
|
||||
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Result(),
|
||||
peekErr: errors.New("fake-peek-error"),
|
||||
expectedProcessed: true,
|
||||
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceled).Result(),
|
||||
},
|
||||
{
|
||||
name: "Dataupload with enabled cancel",
|
||||
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
|
||||
@@ -494,7 +507,7 @@ func TestReconcile(t *testing.T) {
|
||||
}
|
||||
|
||||
if test.du.Spec.SnapshotType == fakeSnapshotType {
|
||||
r.snapshotExposerList = map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{fakeSnapshotType: &fakeSnapshotExposer{r.client, r.Clock}}
|
||||
r.snapshotExposerList = map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{fakeSnapshotType: &fakeSnapshotExposer{r.client, r.Clock, test.peekErr}}
|
||||
} else if test.du.Spec.SnapshotType == velerov2alpha1api.SnapshotTypeCSI {
|
||||
r.snapshotExposerList = map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{velerov2alpha1api.SnapshotTypeCSI: exposer.NewCSISnapshotExposer(r.kubeClient, r.csiSnapshotClient, velerotest.NewLogger())}
|
||||
}
|
||||
@@ -874,7 +887,7 @@ func TestTryCancelDataUpload(t *testing.T) {
|
||||
err = r.client.Create(ctx, test.dd)
|
||||
require.NoError(t, err)
|
||||
|
||||
r.TryCancelDataUpload(ctx, test.dd)
|
||||
r.TryCancelDataUpload(ctx, test.dd, "")
|
||||
|
||||
if test.expectedErr == "" {
|
||||
assert.NoError(t, err)
|
||||
|
||||
Reference in New Issue
Block a user