From 0e0f357cef7cf15d4c1d291d3caafff2eeb69c1e Mon Sep 17 00:00:00 2001 From: Aman Wangde Date: Wed, 10 Apr 2019 15:41:28 -0400 Subject: [PATCH] Added ability to disable controllers (#1326) Signed-off-by: James King --- changelogs/unreleased/1326-amanw | 1 + pkg/cmd/server/server.go | 309 ++++++++++++++++++++----------- 2 files changed, 198 insertions(+), 112 deletions(-) create mode 100644 changelogs/unreleased/1326-amanw diff --git a/changelogs/unreleased/1326-amanw b/changelogs/unreleased/1326-amanw new file mode 100644 index 000000000..48f9f3153 --- /dev/null +++ b/changelogs/unreleased/1326-amanw @@ -0,0 +1 @@ +Added ability to dynamically disable controllers \ No newline at end of file diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index 4df0dfc40..e2bdc21fb 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -84,10 +84,35 @@ const ( defaultProfilerAddress = "localhost:6060" + // keys used to map out available controllers with disable-controllers flag + BackupControllerKey = "backup" + BackupSyncControllerKey = "backup-sync" + ScheduleControllerKey = "schedule" + GcControllerKey = "gc" + BackupDeletionControllerKey = "backup-deletion" + RestoreControllerKey = "restore" + DownloadRequestControllerKey = "download-request" + ResticRepoControllerKey = "restic-repo" + ServerStatusRequestControllerKey = "server-status-request" + + defaultControllerWorkers = 1 // the default TTL for a backup defaultBackupTTL = 30 * 24 * time.Hour ) +// list of available controllers for input validation +var disableControllerList = []string{ + BackupControllerKey, + BackupSyncControllerKey, + ScheduleControllerKey, + GcControllerKey, + BackupDeletionControllerKey, + RestoreControllerKey, + DownloadRequestControllerKey, + ResticRepoControllerKey, + ServerStatusRequestControllerKey, +} + type serverConfig struct { pluginDir, metricsAddress, defaultBackupLocation string backupSyncPeriod, podVolumeOperationTimeout, resourceTerminatingTimeout time.Duration @@ -95,11 +120,17 @@ type serverConfig struct { restoreResourcePriorities []string defaultVolumeSnapshotLocations map[string]string restoreOnly bool + disabledControllers []string clientQPS float32 clientBurst int profilerAddress string } +type controllerRunInfo struct { + controller controller.Interface + numWorkers int +} + func NewCommand() *cobra.Command { var ( volumeSnapshotLocations = flag.NewMap().WithKeyValueDelimiter(":") @@ -169,6 +200,7 @@ func NewCommand() *cobra.Command { command.Flags().DurationVar(&config.backupSyncPeriod, "backup-sync-period", config.backupSyncPeriod, "how often to ensure all Velero backups in object storage exist as Backup API objects in the cluster") command.Flags().DurationVar(&config.podVolumeOperationTimeout, "restic-timeout", config.podVolumeOperationTimeout, "how long backups/restores of pod volumes should be allowed to run before timing out") command.Flags().BoolVar(&config.restoreOnly, "restore-only", config.restoreOnly, "run in a mode where only restores are allowed; backups, schedules, and garbage-collection are all disabled") + command.Flags().StringSliceVar(&config.disabledControllers, "disable-controllers", config.disabledControllers, fmt.Sprintf("list of controllers to disable on startup. Valid values are %s", strings.Join(disableControllerList, ","))) command.Flags().StringSliceVar(&config.restoreResourcePriorities, "restore-resource-priorities", config.restoreResourcePriorities, "desired order of resource restores; any resource not in the list will be restored alphabetically after the prioritized resources") command.Flags().StringVar(&config.defaultBackupLocation, "default-backup-storage-location", config.defaultBackupLocation, "name of the default backup storage location") command.Flags().Var(&volumeSnapshotLocations, "default-volume-snapshot-locations", "list of unique volume providers and default volume snapshot location (provider1:location-01,provider2:location-02,...)") @@ -530,28 +562,28 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string return clientmgmt.NewManager(logger, s.logLevel, s.pluginRegistry) } - backupSyncController := controller.NewBackupSyncController( - s.veleroClient.VeleroV1(), - s.veleroClient.VeleroV1(), - s.sharedInformerFactory.Velero().V1().Backups(), - s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), - s.config.backupSyncPeriod, - s.namespace, - s.config.defaultBackupLocation, - newPluginManager, - s.logger, - ) - wg.Add(1) - go func() { - backupSyncController.Run(ctx, 1) - wg.Done() - }() + backupSyncControllerRunInfo := func() controllerRunInfo { + backupSyncContoller := controller.NewBackupSyncController( + s.veleroClient.VeleroV1(), + s.veleroClient.VeleroV1(), + s.sharedInformerFactory.Velero().V1().Backups(), + s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), + s.config.backupSyncPeriod, + s.namespace, + s.config.defaultBackupLocation, + newPluginManager, + s.logger, + ) - if s.config.restoreOnly { - s.logger.Info("Restore only mode - not starting the backup, schedule, delete-backup, or GC controllers") - } else { - backupTracker := controller.NewBackupTracker() + return controllerRunInfo{ + controller: backupSyncContoller, + numWorkers: defaultControllerWorkers, + } + } + backupTracker := controller.NewBackupTracker() + + backupControllerRunInfo := func() controllerRunInfo { backupper, err := backup.NewKubernetesBackupper( s.discoveryHelper, client.NewDynamicFactory(s.dynamicClient), @@ -576,12 +608,14 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string defaultVolumeSnapshotLocations, s.metrics, ) - wg.Add(1) - go func() { - backupController.Run(ctx, 1) - wg.Done() - }() + return controllerRunInfo{ + controller: backupController, + numWorkers: defaultControllerWorkers, + } + } + + scheduleControllerRunInfo := func() controllerRunInfo { scheduleController := controller.NewScheduleController( s.namespace, s.veleroClient.VeleroV1(), @@ -590,25 +624,29 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string s.logger, s.metrics, ) - wg.Add(1) - go func() { - scheduleController.Run(ctx, 1) - wg.Done() - }() + return controllerRunInfo{ + controller: scheduleController, + numWorkers: defaultControllerWorkers, + } + } + + gcControllerRunInfo := func() controllerRunInfo { gcController := controller.NewGCController( s.logger, s.sharedInformerFactory.Velero().V1().Backups(), s.sharedInformerFactory.Velero().V1().DeleteBackupRequests(), s.veleroClient.VeleroV1(), ) - wg.Add(1) - go func() { - gcController.Run(ctx, 1) - wg.Done() - }() - backupDeletionController := controller.NewBackupDeletionController( + return controllerRunInfo{ + controller: gcController, + numWorkers: defaultControllerWorkers, + } + } + + deletionControllerRunInfo := func() controllerRunInfo { + deletionController := controller.NewBackupDeletionController( s.logger, s.sharedInformerFactory.Velero().V1().DeleteBackupRequests(), s.veleroClient.VeleroV1(), // deleteBackupRequestClient @@ -623,88 +661,135 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string newPluginManager, s.metrics, ) + + return controllerRunInfo{ + controller: deletionController, + numWorkers: defaultControllerWorkers, + } + } + + restoreControllerRunInfo := func() controllerRunInfo { + + restorer, err := restore.NewKubernetesRestorer( + s.discoveryHelper, + client.NewDynamicFactory(s.dynamicClient), + s.config.restoreResourcePriorities, + s.kubeClient.CoreV1().Namespaces(), + s.resticManager, + s.config.podVolumeOperationTimeout, + s.config.resourceTerminatingTimeout, + s.logger, + ) + cmd.CheckError(err) + + restoreController := controller.NewRestoreController( + s.namespace, + s.sharedInformerFactory.Velero().V1().Restores(), + s.veleroClient.VeleroV1(), + s.veleroClient.VeleroV1(), + restorer, + s.sharedInformerFactory.Velero().V1().Backups(), + s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), + s.sharedInformerFactory.Velero().V1().VolumeSnapshotLocations(), + s.logger, + s.logLevel, + newPluginManager, + s.config.defaultBackupLocation, + s.metrics, + ) + + return controllerRunInfo{ + controller: restoreController, + numWorkers: defaultControllerWorkers, + } + } + + resticRepoControllerRunInfo := func() controllerRunInfo { + resticRepoController := controller.NewResticRepositoryController( + s.logger, + s.sharedInformerFactory.Velero().V1().ResticRepositories(), + s.veleroClient.VeleroV1(), + s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), + s.resticManager, + ) + + return controllerRunInfo{ + controller: resticRepoController, + numWorkers: defaultControllerWorkers, + } + } + + downloadrequestControllerRunInfo := func() controllerRunInfo { + downloadRequestController := controller.NewDownloadRequestController( + s.veleroClient.VeleroV1(), + s.sharedInformerFactory.Velero().V1().DownloadRequests(), + s.sharedInformerFactory.Velero().V1().Restores(), + s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), + s.sharedInformerFactory.Velero().V1().Backups(), + newPluginManager, + s.logger, + ) + + return controllerRunInfo{ + controller: downloadRequestController, + numWorkers: defaultControllerWorkers, + } + } + + serverStatusRequestControllerRunInfo := func() controllerRunInfo { + serverStatusRequestController := controller.NewServerStatusRequestController( + s.logger, + s.veleroClient.VeleroV1(), + s.sharedInformerFactory.Velero().V1().ServerStatusRequests(), + ) + + return controllerRunInfo{ + controller: serverStatusRequestController, + numWorkers: defaultControllerWorkers, + } + } + + enabledControllers := map[string]func() controllerRunInfo{ + BackupSyncControllerKey: backupSyncControllerRunInfo, + BackupControllerKey: backupControllerRunInfo, + ScheduleControllerKey: scheduleControllerRunInfo, + GcControllerKey: gcControllerRunInfo, + BackupDeletionControllerKey: deletionControllerRunInfo, + RestoreControllerKey: restoreControllerRunInfo, + ResticRepoControllerKey: resticRepoControllerRunInfo, + DownloadRequestControllerKey: downloadrequestControllerRunInfo, + ServerStatusRequestControllerKey: serverStatusRequestControllerRunInfo, + } + + if s.config.restoreOnly { + s.logger.Info("Restore only mode - not starting the backup, schedule, delete-backup, or GC controllers") + s.config.disabledControllers = append(s.config.disabledControllers, + BackupControllerKey, + ScheduleControllerKey, + GcControllerKey, + BackupDeletionControllerKey, + ) + } + + // remove disabled controllers + for _, controllerName := range s.config.disabledControllers { + if _, ok := enabledControllers[controllerName]; ok { + s.logger.Infof("Disabling controller: %s", controllerName) + delete(enabledControllers, controllerName) + } else { + s.logger.Fatalf("Invalid value for --disable-controllers flag provided: %s. Valid values are: %s", controllerName, strings.Join(disableControllerList, ",")) + } + } + + for _, newController := range enabledControllers { + controllerRunInfo := newController() wg.Add(1) go func() { - backupDeletionController.Run(ctx, 1) + controllerRunInfo.controller.Run(ctx, controllerRunInfo.numWorkers) wg.Done() }() } - restorer, err := restore.NewKubernetesRestorer( - s.discoveryHelper, - client.NewDynamicFactory(s.dynamicClient), - s.config.restoreResourcePriorities, - s.kubeClient.CoreV1().Namespaces(), - s.resticManager, - s.config.podVolumeOperationTimeout, - s.config.resourceTerminatingTimeout, - s.logger, - ) - cmd.CheckError(err) - - restoreController := controller.NewRestoreController( - s.namespace, - s.sharedInformerFactory.Velero().V1().Restores(), - s.veleroClient.VeleroV1(), - s.veleroClient.VeleroV1(), - restorer, - s.sharedInformerFactory.Velero().V1().Backups(), - s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), - s.sharedInformerFactory.Velero().V1().VolumeSnapshotLocations(), - s.logger, - s.logLevel, - newPluginManager, - s.config.defaultBackupLocation, - s.metrics, - ) - - wg.Add(1) - go func() { - restoreController.Run(ctx, 1) - wg.Done() - }() - - downloadRequestController := controller.NewDownloadRequestController( - s.veleroClient.VeleroV1(), - s.sharedInformerFactory.Velero().V1().DownloadRequests(), - s.sharedInformerFactory.Velero().V1().Restores(), - s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), - s.sharedInformerFactory.Velero().V1().Backups(), - newPluginManager, - s.logger, - ) - wg.Add(1) - go func() { - downloadRequestController.Run(ctx, 1) - wg.Done() - }() - - resticRepoController := controller.NewResticRepositoryController( - s.logger, - s.sharedInformerFactory.Velero().V1().ResticRepositories(), - s.veleroClient.VeleroV1(), - s.sharedInformerFactory.Velero().V1().BackupStorageLocations(), - s.resticManager, - ) - wg.Add(1) - go func() { - // TODO only having a single worker may be an issue since maintenance - // can take a long time. - resticRepoController.Run(ctx, 1) - wg.Done() - }() - - serverStatusRequestController := controller.NewServerStatusRequestController( - s.logger, - s.veleroClient.VeleroV1(), - s.sharedInformerFactory.Velero().V1().ServerStatusRequests(), - ) - wg.Add(1) - go func() { - serverStatusRequestController.Run(ctx, 1) - wg.Done() - }() - // SHARED INFORMERS HAVE TO BE STARTED AFTER ALL CONTROLLERS go s.sharedInformerFactory.Start(ctx.Done())