data mover ms smoking test

Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
This commit is contained in:
Lyndon-Li
2024-08-09 14:34:48 +08:00
parent dd3d05bbac
commit 4dea3a48e8
14 changed files with 330 additions and 216 deletions

View File

@@ -18,6 +18,7 @@ package datapath
import (
"context"
"sync"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@@ -66,6 +67,8 @@ type fileSystemBR struct {
callbacks Callbacks
jobName string
requestorType string
wgDataPath sync.WaitGroup
dataPathLock sync.Mutex
}
func newFileSystemBR(jobName string, requestorType string, client client.Client, namespace string, callbacks Callbacks, log logrus.FieldLogger) AsyncBR {
@@ -75,6 +78,7 @@ func newFileSystemBR(jobName string, requestorType string, client client.Client,
client: client,
namespace: namespace,
callbacks: callbacks,
wgDataPath: sync.WaitGroup{},
log: log,
}
@@ -134,6 +138,23 @@ func (fs *fileSystemBR) Init(ctx context.Context, param interface{}) error {
}
func (fs *fileSystemBR) Close(ctx context.Context) {
if fs.cancel != nil {
fs.cancel()
}
fs.log.WithField("user", fs.jobName).Info("Closing FileSystemBR")
fs.wgDataPath.Wait()
fs.close(ctx)
fs.log.WithField("user", fs.jobName).Info("FileSystemBR is closed")
}
func (fs *fileSystemBR) close(ctx context.Context) {
fs.dataPathLock.Lock()
defer fs.dataPathLock.Unlock()
if fs.uploaderProv != nil {
if err := fs.uploaderProv.Close(ctx); err != nil {
fs.log.Errorf("failed to close uploader provider with error %v", err)
@@ -141,13 +162,6 @@ func (fs *fileSystemBR) Close(ctx context.Context) {
fs.uploaderProv = nil
}
if fs.cancel != nil {
fs.cancel()
fs.cancel = nil
}
fs.log.WithField("user", fs.jobName).Info("FileSystemBR is closed")
}
func (fs *fileSystemBR) StartBackup(source AccessPoint, uploaderConfig map[string]string, param interface{}) error {
@@ -155,9 +169,18 @@ func (fs *fileSystemBR) StartBackup(source AccessPoint, uploaderConfig map[strin
return errors.New("file system data path is not initialized")
}
fs.wgDataPath.Add(1)
backupParam := param.(*FSBRStartParam)
go func() {
fs.log.Info("Start data path backup")
defer func() {
fs.close(context.Background())
fs.wgDataPath.Done()
}()
snapshotID, emptySnapshot, err := fs.uploaderProv.RunBackup(fs.ctx, source.ByPath, backupParam.RealSource, backupParam.Tags, backupParam.ForceFull,
backupParam.ParentSnapshot, source.VolMode, uploaderConfig, fs)
@@ -182,7 +205,16 @@ func (fs *fileSystemBR) StartRestore(snapshotID string, target AccessPoint, uplo
return errors.New("file system data path is not initialized")
}
fs.wgDataPath.Add(1)
go func() {
fs.log.Info("Start data path restore")
defer func() {
fs.close(context.Background())
fs.wgDataPath.Done()
}()
err := fs.uploaderProv.RunRestore(fs.ctx, snapshotID, target.ByPath, target.VolMode, uploaderConfigs, fs)
if err == provider.ErrorCanceled {

View File

@@ -96,6 +96,7 @@ func TestAsyncBackup(t *testing.T) {
fs := newFileSystemBR("job-1", "test", nil, "velero", Callbacks{}, velerotest.NewLogger()).(*fileSystemBR)
mockProvider := providerMock.NewProvider(t)
mockProvider.On("RunBackup", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(test.result.Backup.SnapshotID, test.result.Backup.EmptySnapshot, test.err)
mockProvider.On("Close", mock.Anything).Return(nil)
fs.uploaderProv = mockProvider
fs.initialized = true
fs.callbacks = test.callbacks
@@ -179,6 +180,7 @@ func TestAsyncRestore(t *testing.T) {
fs := newFileSystemBR("job-1", "test", nil, "velero", Callbacks{}, velerotest.NewLogger()).(*fileSystemBR)
mockProvider := providerMock.NewProvider(t)
mockProvider.On("RunRestore", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(test.err)
mockProvider.On("Close", mock.Anything).Return(nil)
fs.uploaderProv = mockProvider
fs.initialized = true
fs.callbacks = test.callbacks

View File

@@ -46,11 +46,12 @@ const (
ErrCancelled = "data path is canceled"
EventReasonStarted = "Data-Path-Started"
EventReasonCompleted = "Data-Path-Completed"
EventReasonFailed = "Data-Path-Failed"
EventReasonCancelled = "Data-Path-Canceled"
EventReasonProgress = "Data-Path-Progress"
EventReasonStarted = "Data-Path-Started"
EventReasonCompleted = "Data-Path-Completed"
EventReasonFailed = "Data-Path-Failed"
EventReasonCancelled = "Data-Path-Canceled"
EventReasonProgress = "Data-Path-Progress"
EventReasonCancelling = "Data-Path-Canceling"
)
type microServiceBRWatcher struct {
@@ -76,6 +77,7 @@ type microServiceBRWatcher struct {
podInformer ctrlcache.Informer
eventHandler cache.ResourceEventHandlerRegistration
podHandler cache.ResourceEventHandlerRegistration
watcherLock sync.Mutex
}
func newMicroServiceBRWatcher(client client.Client, kubeClient kubernetes.Interface, mgr manager.Manager, taskType string, taskName string, namespace string,
@@ -121,8 +123,6 @@ func (ms *microServiceBRWatcher) Init(ctx context.Context, param interface{}) er
return
}
ms.log.Infof("Pushed adding event %s/%s, message %s for object %v", evt.Namespace, evt.Name, evt.Message, evt.InvolvedObject)
ms.eventCh <- evt
},
UpdateFunc: func(_, obj interface{}) {
@@ -131,8 +131,6 @@ func (ms *microServiceBRWatcher) Init(ctx context.Context, param interface{}) er
return
}
ms.log.Infof("Pushed updating event %s/%s, message %s for object %v", evt.Namespace, evt.Name, evt.Message, evt.InvolvedObject)
ms.eventCh <- evt
},
},
@@ -177,12 +175,9 @@ func (ms *microServiceBRWatcher) Init(ctx context.Context, param interface{}) er
}
}()
ms.log.WithFields(
logrus.Fields{
"taskType": ms.taskType,
"taskName": ms.taskName,
"thisPod": ms.thisPod,
}).Info("MicroServiceBR is initialized")
if err := ms.reEnsureThisPod(ctx); err != nil {
return err
}
ms.eventInformer = eventInformer
ms.podInformer = podInformer
@@ -191,63 +186,73 @@ func (ms *microServiceBRWatcher) Init(ctx context.Context, param interface{}) er
ms.ctx, ms.cancel = context.WithCancel(ctx)
ms.log.WithFields(
logrus.Fields{
"taskType": ms.taskType,
"taskName": ms.taskName,
"thisPod": ms.thisPod,
}).Info("MicroServiceBR is initialized")
succeeded = true
return nil
}
func (ms *microServiceBRWatcher) Close(ctx context.Context) {
if ms.cancel != nil {
ms.cancel()
ms.cancel = nil
}
ms.log.WithField("taskType", ms.taskType).WithField("taskName", ms.taskName).Info("Closing MicroServiceBR")
ms.wgWatcher.Wait()
if ms.eventInformer != nil && ms.eventHandler != nil {
if err := ms.eventInformer.RemoveEventHandler(ms.eventHandler); err != nil {
ms.log.WithError(err).Warn("Failed to remove event handler")
}
}
if ms.podInformer != nil && ms.podHandler != nil {
if err := ms.podInformer.RemoveEventHandler(ms.podHandler); err != nil {
ms.log.WithError(err).Warn("Failed to remove pod handler")
}
}
ms.close()
ms.log.WithField("taskType", ms.taskType).WithField("taskName", ms.taskName).Info("MicroServiceBR is closed")
}
func (ms *microServiceBRWatcher) StartBackup(source AccessPoint, uploaderConfig map[string]string, param interface{}) error {
ms.log.Infof("Start watching backup ms for source %v", source)
func (ms *microServiceBRWatcher) close() {
ms.watcherLock.Lock()
defer ms.watcherLock.Unlock()
if err := ms.reEnsureThisPod(); err != nil {
return err
if ms.eventHandler != nil {
if err := ms.eventInformer.RemoveEventHandler(ms.eventHandler); err != nil {
ms.log.WithError(err).Warn("Failed to remove event handler")
}
ms.eventHandler = nil
}
if ms.podHandler != nil {
if err := ms.podInformer.RemoveEventHandler(ms.podHandler); err != nil {
ms.log.WithError(err).Warn("Failed to remove pod handler")
}
ms.podHandler = nil
}
}
func (ms *microServiceBRWatcher) StartBackup(source AccessPoint, uploaderConfig map[string]string, param interface{}) error {
ms.log.Infof("Start watching backup ms for source %v", source.ByPath)
ms.startWatch()
return nil
}
func (ms *microServiceBRWatcher) StartRestore(snapshotID string, target AccessPoint, uploaderConfigs map[string]string) error {
ms.log.Infof("Start watching restore ms to target %v, from snapshot %s", target, snapshotID)
if err := ms.reEnsureThisPod(); err != nil {
return err
}
ms.log.Infof("Start watching restore ms to target %s, from snapshot %s", target.ByPath, snapshotID)
ms.startWatch()
return nil
}
func (ms *microServiceBRWatcher) reEnsureThisPod() error {
func (ms *microServiceBRWatcher) reEnsureThisPod(ctx context.Context) error {
thisPod := &v1.Pod{}
if err := ms.client.Get(ms.ctx, types.NamespacedName{
if err := ms.client.Get(ctx, types.NamespacedName{
Namespace: ms.namespace,
Name: ms.thisPod,
}, thisPod); err != nil {
@@ -275,6 +280,11 @@ func (ms *microServiceBRWatcher) startWatch() {
go func() {
ms.log.Info("Start watching data path pod")
defer func() {
ms.close()
ms.wgWatcher.Done()
}()
var lastPod *v1.Pod
watchLoop:
@@ -291,14 +301,16 @@ func (ms *microServiceBRWatcher) startWatch() {
}
if lastPod == nil {
ms.log.Warn("Data path pod watch loop is canceled")
ms.wgWatcher.Done()
ms.log.Warn("Watch loop is cancelled on waiting data path pod")
return
}
epilogLoop:
for !ms.startedFromEvent || !ms.terminatedFromEvent {
select {
case <-ms.ctx.Done():
ms.log.Warn("Watch loop is cancelled on waiting final event")
return
case <-time.After(eventWaitTimeout):
break epilogLoop
case evt := <-ms.eventCh:
@@ -339,8 +351,6 @@ func (ms *microServiceBRWatcher) startWatch() {
}
logger.Info("Complete callback on data path pod termination")
ms.wgWatcher.Done()
}()
}
@@ -348,20 +358,22 @@ func (ms *microServiceBRWatcher) onEvent(evt *v1.Event) {
switch evt.Reason {
case EventReasonStarted:
ms.startedFromEvent = true
ms.log.Infof("Received data path start message %s", evt.Message)
ms.log.Infof("Received data path start message: %s", evt.Message)
case EventReasonProgress:
ms.callbacks.OnProgress(ms.ctx, ms.namespace, ms.taskName, funcGetProgressFromMessage(evt.Message, ms.log))
case EventReasonCompleted:
ms.log.Infof("Received data path completed message %v", funcGetResultFromMessage(ms.taskType, evt.Message, ms.log))
ms.log.Infof("Received data path completed message: %v", funcGetResultFromMessage(ms.taskType, evt.Message, ms.log))
ms.terminatedFromEvent = true
case EventReasonCancelled:
ms.log.Infof("Received data path canceled message %s", evt.Message)
ms.log.Infof("Received data path canceled message: %s", evt.Message)
ms.terminatedFromEvent = true
case EventReasonFailed:
ms.log.Infof("Received data path failed message %s", evt.Message)
ms.log.Infof("Received data path failed message: %s", evt.Message)
ms.terminatedFromEvent = true
case EventReasonCancelling:
ms.log.Infof("Received data path canceling message: %s", evt.Message)
default:
ms.log.Debugf("Received event for data mover %s.[reason %s, message %s]", ms.taskName, evt.Reason, evt.Message)
ms.log.Infof("Received event for data path %s,reason: %s, message: %s", ms.taskName, evt.Reason, evt.Message)
}
}

View File

@@ -102,7 +102,7 @@ func TestReEnsureThisPod(t *testing.T) {
log: velerotest.NewLogger(),
}
err := ms.reEnsureThisPod()
err := ms.reEnsureThisPod(context.Background())
if test.expectErr != "" {
assert.EqualError(t, err, test.expectErr)
} else {