Implement parallel ItemBlock processing via backup_controller goroutines

Signed-off-by: Scott Seago <sseago@redhat.com>
This commit is contained in:
Scott Seago
2025-01-16 18:07:59 -05:00
parent 79707aaa60
commit fcfb2fd9ee
12 changed files with 371 additions and 70 deletions

View File

@@ -26,6 +26,7 @@ import (
"io"
"os"
"path/filepath"
"sync"
"time"
"github.com/pkg/errors"
@@ -238,7 +239,7 @@ func (kb *kubernetesBackupper) BackupWithResolvers(
gzippedData := gzip.NewWriter(backupFile)
defer gzippedData.Close()
tw := tar.NewWriter(gzippedData)
tw := NewTarWriter(tar.NewWriter(gzippedData))
defer tw.Close()
log.Info("Writing backup version file")
@@ -380,6 +381,7 @@ func (kb *kubernetesBackupper) BackupWithResolvers(
boolptr.IsSetToTrue(backupRequest.Spec.DefaultVolumesToFsBackup),
!backupRequest.ResourceIncludesExcludes.ShouldInclude(kuberesource.PersistentVolumeClaims.String()),
),
kubernetesBackupper: kb,
}
// helper struct to send current progress between the main
@@ -431,6 +433,8 @@ func (kb *kubernetesBackupper) BackupWithResolvers(
}
}()
responseCtx, responseCancel := context.WithCancel(context.Background())
backedUpGroupResources := map[schema.GroupResource]bool{}
// Maps items in the item list from GR+NamespacedName to a slice of pointers to kubernetesResources
// We need the slice value since if the EnableAPIGroupVersions feature flag is set, there may
@@ -443,9 +447,60 @@ func (kb *kubernetesBackupper) BackupWithResolvers(
Name: items[i].name,
}
itemsMap[key] = append(itemsMap[key], items[i])
// add to total items for progress reporting
if items[i].kind != "" {
backupRequest.BackedUpItems.AddItemToTotal(itemKey{
resource: fmt.Sprintf("%s/%s", items[i].preferredGVR.GroupVersion().String(), items[i].kind),
namespace: items[i].namespace,
name: items[i].name,
})
}
}
var itemBlock *BackupItemBlock
itemBlockReturn := make(chan ItemBlockReturn, 100)
wg := &sync.WaitGroup{}
// Handle returns from worker pool processing ItemBlocks
go func() {
for {
select {
case response := <-itemBlockReturn: // process each BackupItemBlock response
func() {
defer wg.Done()
if response.err != nil {
log.WithError(errors.WithStack((response.err))).Error("Got error in BackupItemBlock.")
}
for _, backedUpGR := range response.resources {
backedUpGroupResources[backedUpGR] = true
}
// We could eventually track which itemBlocks have finished
// using response.itemBlock
// updated total is computed as "how many items we've backed up so far,
// plus how many items are processed but not yet backed up plus how many
// we know of that are remaining to be processed"
backedUpItems, totalItems := backupRequest.BackedUpItems.BackedUpAndTotalLen()
// send a progress update
update <- progressUpdate{
totalItems: totalItems,
itemsBackedUp: backedUpItems,
}
if len(response.itemBlock.Items) > 0 {
log.WithFields(map[string]any{
"progress": "",
"kind": response.itemBlock.Items[0].Item.GroupVersionKind().GroupKind().String(),
"namespace": response.itemBlock.Items[0].Item.GetNamespace(),
"name": response.itemBlock.Items[0].Item.GetName(),
}).Infof("Backed up %d items out of an estimated total of %d (estimate will change throughout the backup)", backedUpItems, totalItems)
}
}()
case <-responseCtx.Done():
return
}
}
}()
for i := range items {
log.WithFields(map[string]any{
@@ -491,32 +546,32 @@ func (kb *kubernetesBackupper) BackupWithResolvers(
addNextToBlock := i < len(items)-1 && items[i].orderedResource && items[i+1].orderedResource && items[i].groupResource == items[i+1].groupResource
if itemBlock != nil && len(itemBlock.Items) > 0 && !addNextToBlock {
log.Infof("Backing Up Item Block including %s %s/%s (%v items in block)", items[i].groupResource.String(), items[i].namespace, items[i].name, len(itemBlock.Items))
backedUpGRs := kb.backupItemBlock(*itemBlock)
for _, backedUpGR := range backedUpGRs {
backedUpGroupResources[backedUpGR] = true
wg.Add(1)
backupRequest.ItemBlockChannel <- ItemBlockInput{
itemBlock: itemBlock,
returnChan: itemBlockReturn,
}
itemBlock = nil
}
// updated total is computed as "how many items we've backed up so far, plus
// how many items we know of that are remaining"
backedUpItems := backupRequest.BackedUpItems.Len()
totalItems := backedUpItems + (len(items) - (i + 1))
// send a progress update
update <- progressUpdate{
totalItems: totalItems,
itemsBackedUp: backedUpItems,
}
log.WithFields(map[string]any{
"progress": "",
"resource": items[i].groupResource.String(),
"namespace": items[i].namespace,
"name": items[i].name,
}).Infof("Backed up %d items out of an estimated total of %d (estimate will change throughout the backup)", backedUpItems, totalItems)
}
done := make(chan struct{})
go func() {
defer close(done)
wg.Wait()
}()
// Wait for all the ItemBlocks to be processed
select {
case <-done:
log.Info("done processing ItemBlocks")
case <-responseCtx.Done():
log.Info("ItemBlock processing canceled")
}
// cancel response-processing goroutine
responseCancel()
// no more progress updates will be sent on the 'update' channel
quit <- struct{}{}
@@ -663,7 +718,7 @@ func (kb *kubernetesBackupper) executeItemBlockActions(
}
}
func (kb *kubernetesBackupper) backupItemBlock(itemBlock BackupItemBlock) []schema.GroupResource {
func (kb *kubernetesBackupper) backupItemBlock(itemBlock *BackupItemBlock) []schema.GroupResource {
// find pods in ItemBlock
// filter pods based on whether they still need to be backed up
// this list will be used to run pre/post hooks
@@ -697,7 +752,7 @@ func (kb *kubernetesBackupper) backupItemBlock(itemBlock BackupItemBlock) []sche
itemBlock.Log.Debug("Backing up items in BackupItemBlock")
var grList []schema.GroupResource
for _, item := range itemBlock.Items {
if backedUp := kb.backupItem(itemBlock.Log, item.Gr, itemBlock.itemBackupper, item.Item, item.PreferredGVR, &itemBlock); backedUp {
if backedUp := kb.backupItem(itemBlock.Log, item.Gr, itemBlock.itemBackupper, item.Item, item.PreferredGVR, itemBlock); backedUp {
grList = append(grList, item.Gr)
}
}
@@ -724,7 +779,7 @@ func (kb *kubernetesBackupper) getItemKey(item itemblock.ItemBlockItem) (itemKey
return key, nil
}
func (kb *kubernetesBackupper) handleItemBlockPreHooks(itemBlock BackupItemBlock, hookPods []itemblock.ItemBlockItem) ([]itemblock.ItemBlockItem, []itemblock.ItemBlockItem, []error) {
func (kb *kubernetesBackupper) handleItemBlockPreHooks(itemBlock *BackupItemBlock, hookPods []itemblock.ItemBlockItem) ([]itemblock.ItemBlockItem, []itemblock.ItemBlockItem, []error) {
var successPods []itemblock.ItemBlockItem
var failedPods []itemblock.ItemBlockItem
var errs []error
@@ -741,7 +796,7 @@ func (kb *kubernetesBackupper) handleItemBlockPreHooks(itemBlock BackupItemBlock
}
// The hooks cannot execute until the PVBs to be processed
func (kb *kubernetesBackupper) handleItemBlockPostHooks(itemBlock BackupItemBlock, hookPods []itemblock.ItemBlockItem) {
func (kb *kubernetesBackupper) handleItemBlockPostHooks(itemBlock *BackupItemBlock, hookPods []itemblock.ItemBlockItem) {
log := itemBlock.Log
defer itemBlock.itemBackupper.hookTracker.AsyncItemBlocks.Done()
@@ -760,7 +815,7 @@ func (kb *kubernetesBackupper) handleItemBlockPostHooks(itemBlock BackupItemBloc
}
// wait all PVBs of the item block pods to be processed
func (kb *kubernetesBackupper) waitUntilPVBsProcessed(ctx context.Context, log logrus.FieldLogger, itemBlock BackupItemBlock, pods []itemblock.ItemBlockItem) error {
func (kb *kubernetesBackupper) waitUntilPVBsProcessed(ctx context.Context, log logrus.FieldLogger, itemBlock *BackupItemBlock, pods []itemblock.ItemBlockItem) error {
pvbMap := map[*velerov1api.PodVolumeBackup]bool{}
for _, pod := range pods {
namespace, name := pod.Item.GetNamespace(), pod.Item.GetName()
@@ -883,7 +938,7 @@ func (kb *kubernetesBackupper) backupCRD(log logrus.FieldLogger, gr schema.Group
kb.backupItem(log, gvr.GroupResource(), itemBackupper, unstructured, gvr, nil)
}
func (kb *kubernetesBackupper) writeBackupVersion(tw *tar.Writer) error {
func (kb *kubernetesBackupper) writeBackupVersion(tw tarWriter) error {
versionFile := filepath.Join(velerov1api.MetadataDir, "version")
versionString := fmt.Sprintf("%s\n", BackupFormatVersion)
@@ -914,7 +969,7 @@ func (kb *kubernetesBackupper) FinalizeBackup(
) error {
gzw := gzip.NewWriter(outBackupFile)
defer gzw.Close()
tw := tar.NewWriter(gzw)
tw := NewTarWriter(tar.NewWriter(gzw))
defer tw.Close()
gzr, err := gzip.NewReader(inBackupFile)
@@ -968,6 +1023,7 @@ func (kb *kubernetesBackupper) FinalizeBackup(
itemHookHandler: &hook.NoOpItemHookHandler{},
podVolumeSnapshotTracker: podvolume.NewTracker(),
hookTracker: hook.NewHookTracker(),
kubernetesBackupper: kb,
}
updateFiles := make(map[string]FileForArchive)
backedUpGroupResources := map[schema.GroupResource]bool{}
@@ -1053,7 +1109,9 @@ func (kb *kubernetesBackupper) FinalizeBackup(
return nil
}
func buildFinalTarball(tr *tar.Reader, tw *tar.Writer, updateFiles map[string]FileForArchive) error {
func buildFinalTarball(tr *tar.Reader, tw tarWriter, updateFiles map[string]FileForArchive) error {
tw.Lock()
defer tw.Unlock()
for {
header, err := tr.Next()
if err == io.EOF {
@@ -1104,10 +1162,16 @@ func buildFinalTarball(tr *tar.Reader, tw *tar.Writer, updateFiles map[string]Fi
return nil
}
type tarWriter interface {
io.Closer
Write([]byte) (int, error)
WriteHeader(*tar.Header) error
type tarWriter struct {
*tar.Writer
*sync.Mutex
}
func NewTarWriter(writer *tar.Writer) tarWriter {
return tarWriter{
Writer: writer,
Mutex: &sync.Mutex{},
}
}
// updateVolumeInfos update the VolumeInfos according to the AsyncOperations