mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-08 06:15:40 +00:00
Perf improvements for existing resource restore
Use informer cache with dynamic client for Get calls on restore When enabled, also make the Get call before create. Add server and install parameter to allow disabling this feature, but enable by default Signed-off-by: Scott Seago <sseago@redhat.com>
This commit is contained in:
@@ -51,14 +51,15 @@ func resourceKey(obj runtime.Object) string {
|
||||
type Request struct {
|
||||
*velerov1api.Restore
|
||||
|
||||
Log logrus.FieldLogger
|
||||
Backup *velerov1api.Backup
|
||||
PodVolumeBackups []*velerov1api.PodVolumeBackup
|
||||
VolumeSnapshots []*volume.Snapshot
|
||||
BackupReader io.Reader
|
||||
RestoredItems map[itemKey]restoredItemStatus
|
||||
itemOperationsList *[]*itemoperation.RestoreOperation
|
||||
ResourceModifiers *resourcemodifiers.ResourceModifiers
|
||||
Log logrus.FieldLogger
|
||||
Backup *velerov1api.Backup
|
||||
PodVolumeBackups []*velerov1api.PodVolumeBackup
|
||||
VolumeSnapshots []*volume.Snapshot
|
||||
BackupReader io.Reader
|
||||
RestoredItems map[itemKey]restoredItemStatus
|
||||
itemOperationsList *[]*itemoperation.RestoreOperation
|
||||
ResourceModifiers *resourcemodifiers.ResourceModifiers
|
||||
DisableInformerCache bool
|
||||
}
|
||||
|
||||
type restoredItemStatus struct {
|
||||
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -42,6 +43,8 @@ import (
|
||||
kubeerrs "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/dynamic/dynamicinformer"
|
||||
"k8s.io/client-go/informers"
|
||||
corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
crclient "sigs.k8s.io/controller-runtime/pkg/client"
|
||||
@@ -299,6 +302,8 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
|
||||
resourceTerminatingTimeout: kr.resourceTerminatingTimeout,
|
||||
resourceTimeout: kr.resourceTimeout,
|
||||
resourceClients: make(map[resourceClientKey]client.Dynamic),
|
||||
dynamicInformerFactories: make(map[string]*informerFactoryWithContext),
|
||||
resourceInformers: make(map[resourceClientKey]informers.GenericInformer),
|
||||
restoredItems: req.RestoredItems,
|
||||
renamedPVs: make(map[string]string),
|
||||
pvRenamer: kr.pvRenamer,
|
||||
@@ -312,6 +317,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
|
||||
kbClient: kr.kbClient,
|
||||
itemOperationsList: req.GetItemOperationsList(),
|
||||
resourceModifiers: req.ResourceModifiers,
|
||||
disableInformerCache: req.DisableInformerCache,
|
||||
}
|
||||
|
||||
return restoreCtx.execute()
|
||||
@@ -345,6 +351,8 @@ type restoreContext struct {
|
||||
resourceTerminatingTimeout time.Duration
|
||||
resourceTimeout time.Duration
|
||||
resourceClients map[resourceClientKey]client.Dynamic
|
||||
dynamicInformerFactories map[string]*informerFactoryWithContext
|
||||
resourceInformers map[resourceClientKey]informers.GenericInformer
|
||||
restoredItems map[itemKey]restoredItemStatus
|
||||
renamedPVs map[string]string
|
||||
pvRenamer func(string) (string, error)
|
||||
@@ -359,6 +367,7 @@ type restoreContext struct {
|
||||
kbClient crclient.Client
|
||||
itemOperationsList *[]*itemoperation.RestoreOperation
|
||||
resourceModifiers *resourcemodifiers.ResourceModifiers
|
||||
disableInformerCache bool
|
||||
}
|
||||
|
||||
type resourceClientKey struct {
|
||||
@@ -366,6 +375,12 @@ type resourceClientKey struct {
|
||||
namespace string
|
||||
}
|
||||
|
||||
type informerFactoryWithContext struct {
|
||||
factory dynamicinformer.DynamicSharedInformerFactory
|
||||
context go_context.Context
|
||||
cancel go_context.CancelFunc
|
||||
}
|
||||
|
||||
// getOrderedResources returns an ordered list of resource identifiers to restore,
|
||||
// based on the provided resource priorities and backup contents. The returned list
|
||||
// begins with all of the high prioritized resources (in order), ends with all of
|
||||
@@ -416,6 +431,17 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
|
||||
}
|
||||
}()
|
||||
|
||||
// Need to stop all informers if enabled
|
||||
if !ctx.disableInformerCache {
|
||||
defer func() {
|
||||
// Call the cancel func to close the channel for each started informer
|
||||
for _, factory := range ctx.dynamicInformerFactories {
|
||||
factory.cancel()
|
||||
}
|
||||
// After upgrading to client-go 0.27 or newer, also call Shutdown for each informer factory
|
||||
}()
|
||||
}
|
||||
|
||||
// Need to set this for additionalItems to be restored.
|
||||
ctx.restoreDir = dir
|
||||
|
||||
@@ -520,6 +546,32 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
|
||||
warnings.Merge(&w)
|
||||
errs.Merge(&e)
|
||||
|
||||
// initialize informer caches for selected resources if enabled
|
||||
if !ctx.disableInformerCache {
|
||||
// CRD informer will have already been initialized if any CRDs were created,
|
||||
// but already-initialized informers aren't re-initialized because getGenericInformer
|
||||
// looks for an existing one first.
|
||||
factoriesToStart := make(map[string]*informerFactoryWithContext)
|
||||
for _, informerResource := range selectedResourceCollection {
|
||||
gr := schema.ParseGroupResource(informerResource.resource)
|
||||
for _, items := range informerResource.selectedItemsByNamespace {
|
||||
// don't use ns key since it represents original ns, not mapped ns
|
||||
if len(items) == 0 {
|
||||
continue
|
||||
}
|
||||
// use the first item in the list to initialize the informer. The rest of the list
|
||||
// should share the same gvr and namespace
|
||||
_, factory := ctx.getGenericInformerInternal(gr, items[0].version, items[0].targetNamespace)
|
||||
if factory != nil {
|
||||
factoriesToStart[items[0].targetNamespace] = factory
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, factoryWithContext := range factoriesToStart {
|
||||
factoryWithContext.factory.WaitForCacheSync(factoryWithContext.context.Done())
|
||||
}
|
||||
}
|
||||
|
||||
// reset processedItems and totalItems before processing full resource list
|
||||
processedItems = 0
|
||||
totalItems = 0
|
||||
@@ -934,11 +986,14 @@ func (ctx *restoreContext) itemsAvailable(action framework.RestoreItemResolvedAc
|
||||
return available, err
|
||||
}
|
||||
|
||||
func (ctx *restoreContext) getResourceClient(groupResource schema.GroupResource, obj *unstructured.Unstructured, namespace string) (client.Dynamic, error) {
|
||||
key := resourceClientKey{
|
||||
resource: groupResource.WithVersion(obj.GroupVersionKind().Version),
|
||||
func getResourceClientKey(groupResource schema.GroupResource, version, namespace string) resourceClientKey {
|
||||
return resourceClientKey{
|
||||
resource: groupResource.WithVersion(version),
|
||||
namespace: namespace,
|
||||
}
|
||||
}
|
||||
func (ctx *restoreContext) getResourceClient(groupResource schema.GroupResource, obj *unstructured.Unstructured, namespace string) (client.Dynamic, error) {
|
||||
key := getResourceClientKey(groupResource, obj.GroupVersionKind().Version, namespace)
|
||||
|
||||
if client, ok := ctx.resourceClients[key]; ok {
|
||||
return client, nil
|
||||
@@ -962,6 +1017,49 @@ func (ctx *restoreContext) getResourceClient(groupResource schema.GroupResource,
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// if new informer is created, non-nil factory is returned
|
||||
func (ctx *restoreContext) getGenericInformerInternal(groupResource schema.GroupResource, version, namespace string) (informers.GenericInformer, *informerFactoryWithContext) {
|
||||
var returnFactory *informerFactoryWithContext
|
||||
|
||||
key := getResourceClientKey(groupResource, version, namespace)
|
||||
factoryWithContext, ok := ctx.dynamicInformerFactories[key.namespace]
|
||||
if !ok {
|
||||
factory := ctx.dynamicFactory.DynamicSharedInformerFactoryForNamespace(namespace)
|
||||
informerContext, informerCancel := signal.NotifyContext(go_context.Background(), os.Interrupt)
|
||||
factoryWithContext = &informerFactoryWithContext{
|
||||
factory: factory,
|
||||
context: informerContext,
|
||||
cancel: informerCancel,
|
||||
}
|
||||
ctx.dynamicInformerFactories[key.namespace] = factoryWithContext
|
||||
}
|
||||
informer, ok := ctx.resourceInformers[key]
|
||||
if !ok {
|
||||
ctx.log.Infof("[debug] Creating factory for %s in namespace %s", key.resource, key.namespace)
|
||||
informer = factoryWithContext.factory.ForResource(key.resource)
|
||||
factoryWithContext.factory.Start(factoryWithContext.context.Done())
|
||||
ctx.resourceInformers[key] = informer
|
||||
returnFactory = factoryWithContext
|
||||
}
|
||||
return informer, returnFactory
|
||||
}
|
||||
|
||||
func (ctx *restoreContext) getGenericInformer(groupResource schema.GroupResource, version, namespace string) informers.GenericInformer {
|
||||
informer, factoryWithContext := ctx.getGenericInformerInternal(groupResource, version, namespace)
|
||||
if factoryWithContext != nil {
|
||||
factoryWithContext.factory.WaitForCacheSync(factoryWithContext.context.Done())
|
||||
}
|
||||
return informer
|
||||
}
|
||||
func (ctx *restoreContext) getResourceLister(groupResource schema.GroupResource, obj *unstructured.Unstructured, namespace string) cache.GenericNamespaceLister {
|
||||
informer := ctx.getGenericInformer(groupResource, obj.GroupVersionKind().Version, namespace)
|
||||
if namespace == "" {
|
||||
return informer.Lister()
|
||||
} else {
|
||||
return informer.Lister().ByNamespace(namespace)
|
||||
}
|
||||
}
|
||||
|
||||
func getResourceID(groupResource schema.GroupResource, namespace, name string) string {
|
||||
if namespace == "" {
|
||||
return fmt.Sprintf("%s/%s", groupResource.String(), name)
|
||||
@@ -970,6 +1068,20 @@ func getResourceID(groupResource schema.GroupResource, namespace, name string) s
|
||||
return fmt.Sprintf("%s/%s/%s", groupResource.String(), namespace, name)
|
||||
}
|
||||
|
||||
func (ctx *restoreContext) getResource(groupResource schema.GroupResource, obj *unstructured.Unstructured, namespace, name string) (*unstructured.Unstructured, error) {
|
||||
lister := ctx.getResourceLister(groupResource, obj, namespace)
|
||||
clusterObj, err := lister.Get(name)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "error getting resource from lister for %s, %s/%s", groupResource, namespace, name)
|
||||
}
|
||||
u, ok := clusterObj.(*unstructured.Unstructured)
|
||||
if !ok {
|
||||
ctx.log.WithError(errors.WithStack(fmt.Errorf("expected *unstructured.Unstructured but got %T", u))).Error("unable to understand entry returned from client")
|
||||
return nil, fmt.Errorf("expected *unstructured.Unstructured but got %T", u)
|
||||
}
|
||||
return u, nil
|
||||
}
|
||||
|
||||
func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupResource schema.GroupResource, namespace string) (results.Result, results.Result, bool) {
|
||||
warnings, errs := results.Result{}, results.Result{}
|
||||
// itemExists bool is used to determine whether to include this item in the "wait for additional items" list
|
||||
@@ -1163,6 +1275,7 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
|
||||
|
||||
ctx.renamedPVs[oldName] = pvName
|
||||
obj.SetName(pvName)
|
||||
name = pvName
|
||||
|
||||
// Add the original PV name as an annotation.
|
||||
annotations := obj.GetAnnotations()
|
||||
@@ -1382,27 +1495,44 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
|
||||
}
|
||||
|
||||
ctx.log.Infof("Attempting to restore %s: %v", obj.GroupVersionKind().Kind, name)
|
||||
createdObj, restoreErr := resourceClient.Create(obj)
|
||||
if restoreErr == nil {
|
||||
itemExists = true
|
||||
ctx.restoredItems[itemKey] = restoredItemStatus{action: itemRestoreResultCreated, itemExists: itemExists}
|
||||
|
||||
// check if we want to treat the error as a warning, in some cases the creation call might not get executed due to object API validations
|
||||
// and Velero might not get the already exists error type but in reality the object already exists
|
||||
var fromCluster, createdObj *unstructured.Unstructured
|
||||
var restoreErr error
|
||||
|
||||
// only attempt Get before Create if using informer cache, otherwise this will slow down restore into
|
||||
// new namespace
|
||||
if !ctx.disableInformerCache {
|
||||
ctx.log.Debugf("Checking for existence %s: %v", obj.GroupVersionKind().Kind, name)
|
||||
fromCluster, err = ctx.getResource(groupResource, obj, namespace, name)
|
||||
}
|
||||
if err != nil || fromCluster == nil {
|
||||
// couldn't find the resource, attempt to create
|
||||
ctx.log.Debugf("Creating %s: %v", obj.GroupVersionKind().Kind, name)
|
||||
createdObj, restoreErr = resourceClient.Create(obj)
|
||||
if restoreErr == nil {
|
||||
itemExists = true
|
||||
ctx.restoredItems[itemKey] = restoredItemStatus{action: itemRestoreResultCreated, itemExists: itemExists}
|
||||
}
|
||||
}
|
||||
|
||||
isAlreadyExistsError, err := isAlreadyExistsError(ctx, obj, restoreErr, resourceClient)
|
||||
if err != nil {
|
||||
errs.Add(namespace, err)
|
||||
return warnings, errs, itemExists
|
||||
}
|
||||
|
||||
// check if we want to treat the error as a warning, in some cases the creation call might not get executed due to object API validations
|
||||
// and Velero might not get the already exists error type but in reality the object already exists
|
||||
var fromCluster *unstructured.Unstructured
|
||||
|
||||
if restoreErr != nil {
|
||||
// check for the existence of the object in cluster, if no error then it implies that object exists
|
||||
// and if err then we want to judge whether there is an existing error in the previous creation.
|
||||
// if so, we will return the 'get' error.
|
||||
// otherwise, we will return the original creation error.
|
||||
fromCluster, err = resourceClient.Get(name, metav1.GetOptions{})
|
||||
if !ctx.disableInformerCache {
|
||||
fromCluster, err = ctx.getResource(groupResource, obj, namespace, name)
|
||||
} else {
|
||||
fromCluster, err = resourceClient.Get(name, metav1.GetOptions{})
|
||||
}
|
||||
if err != nil && isAlreadyExistsError {
|
||||
ctx.log.Errorf("Error retrieving in-cluster version of %s: %v", kube.NamespaceAndName(obj), err)
|
||||
errs.Add(namespace, err)
|
||||
@@ -1947,6 +2077,7 @@ type restoreableItem struct {
|
||||
path string
|
||||
targetNamespace string
|
||||
name string
|
||||
version string // used for initializing informer cache
|
||||
}
|
||||
|
||||
// getOrderedResourceCollection iterates over list of ordered resource
|
||||
@@ -2136,6 +2267,7 @@ func (ctx *restoreContext) getSelectedRestoreableItems(resource, targetNamespace
|
||||
path: itemPath,
|
||||
name: item,
|
||||
targetNamespace: targetNamespace,
|
||||
version: obj.GroupVersionKind().Version,
|
||||
}
|
||||
restorable.selectedItemsByNamespace[originalNamespace] =
|
||||
append(restorable.selectedItemsByNamespace[originalNamespace], selectedItem)
|
||||
|
||||
@@ -861,6 +861,7 @@ func TestRestoreItems(t *testing.T) {
|
||||
tarball io.Reader
|
||||
want []*test.APIResource
|
||||
expectedRestoreItems map[itemKey]restoredItemStatus
|
||||
disableInformer bool
|
||||
}{
|
||||
{
|
||||
name: "metadata uid/resourceVersion/etc. gets removed",
|
||||
@@ -1017,6 +1018,26 @@ func TestRestoreItems(t *testing.T) {
|
||||
apiResources: []*test.APIResource{
|
||||
test.Secrets(builder.ForSecret("ns-1", "sa-1").Data(map[string][]byte{"foo": []byte("bar")}).Result()),
|
||||
},
|
||||
disableInformer: true,
|
||||
want: []*test.APIResource{
|
||||
test.Secrets(builder.ForSecret("ns-1", "sa-1").ObjectMeta(builder.WithLabels("velero.io/backup-name", "backup-1", "velero.io/restore-name", "restore-1")).Data(map[string][]byte{"key-1": []byte("value-1")}).Result()),
|
||||
},
|
||||
expectedRestoreItems: map[itemKey]restoredItemStatus{
|
||||
{resource: "v1/Namespace", namespace: "", name: "ns-1"}: {action: "created", itemExists: true},
|
||||
{resource: "v1/Secret", namespace: "ns-1", name: "sa-1"}: {action: "updated", itemExists: true},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update secret data and labels when secret exists in cluster and is not identical to the backed up one, existing resource policy is update, using informer cache",
|
||||
restore: defaultRestore().ExistingResourcePolicy("update").Result(),
|
||||
backup: defaultBackup().Result(),
|
||||
tarball: test.NewTarWriter(t).
|
||||
AddItems("secrets", builder.ForSecret("ns-1", "sa-1").Data(map[string][]byte{"key-1": []byte("value-1")}).Result()).
|
||||
Done(),
|
||||
apiResources: []*test.APIResource{
|
||||
test.Secrets(builder.ForSecret("ns-1", "sa-1").Data(map[string][]byte{"foo": []byte("bar")}).Result()),
|
||||
},
|
||||
disableInformer: false,
|
||||
want: []*test.APIResource{
|
||||
test.Secrets(builder.ForSecret("ns-1", "sa-1").ObjectMeta(builder.WithLabels("velero.io/backup-name", "backup-1", "velero.io/restore-name", "restore-1")).Data(map[string][]byte{"key-1": []byte("value-1")}).Result()),
|
||||
},
|
||||
@@ -1175,13 +1196,14 @@ func TestRestoreItems(t *testing.T) {
|
||||
}
|
||||
|
||||
data := &Request{
|
||||
Log: h.log,
|
||||
Restore: tc.restore,
|
||||
Backup: tc.backup,
|
||||
PodVolumeBackups: nil,
|
||||
VolumeSnapshots: nil,
|
||||
BackupReader: tc.tarball,
|
||||
RestoredItems: map[itemKey]restoredItemStatus{},
|
||||
Log: h.log,
|
||||
Restore: tc.restore,
|
||||
Backup: tc.backup,
|
||||
PodVolumeBackups: nil,
|
||||
VolumeSnapshots: nil,
|
||||
BackupReader: tc.tarball,
|
||||
RestoredItems: map[itemKey]restoredItemStatus{},
|
||||
DisableInformerCache: tc.disableInformer,
|
||||
}
|
||||
warnings, errs := h.restorer.Restore(
|
||||
data,
|
||||
|
||||
Reference in New Issue
Block a user