From 1b8da68fc29da908fb944fb5bcb188bfdef89533 Mon Sep 17 00:00:00 2001 From: Tobias Giese Date: Thu, 28 Oct 2021 19:19:05 +0200 Subject: [PATCH] metrics: add items gauges Signed-off-by: Tobias Giese --- changelogs/unreleased/4296-tobiasgiese | 1 + pkg/controller/backup_controller.go | 10 +++++-- pkg/metrics/metrics.go | 39 ++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 changelogs/unreleased/4296-tobiasgiese diff --git a/changelogs/unreleased/4296-tobiasgiese b/changelogs/unreleased/4296-tobiasgiese new file mode 100644 index 000000000..28b8ef763 --- /dev/null +++ b/changelogs/unreleased/4296-tobiasgiese @@ -0,0 +1 @@ +Add metrics backup_items_total and backup_items_errors diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index 034f56d43..dfe7ed001 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -627,15 +627,15 @@ func (c *backupController) runBackup(backup *pkgbackup.Request) error { } } + backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel) + backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel) + recordBackupMetrics(backupLog, backup.Backup, backupFile, c.metrics) if err := gzippedLogFile.Close(); err != nil { c.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).WithError(err).Error("error closing gzippedLogFile") } - backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel) - backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel) - // Assign finalize phase as close to end as possible so that any errors // logged to backupLog are captured. This is done before uploading the // artifacts to object storage so that the JSON representation of the @@ -685,6 +685,10 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac serverMetrics.RegisterVolumeSnapshotAttempts(backupScheduleName, backup.Status.VolumeSnapshotsAttempted) serverMetrics.RegisterVolumeSnapshotSuccesses(backupScheduleName, backup.Status.VolumeSnapshotsCompleted) serverMetrics.RegisterVolumeSnapshotFailures(backupScheduleName, backup.Status.VolumeSnapshotsAttempted-backup.Status.VolumeSnapshotsCompleted) + if backup.Status.Progress != nil { + serverMetrics.RegisterBackupItemsTotalGauge(backupScheduleName, backup.Status.Progress.TotalItems) + } + serverMetrics.RegisterBackupItemsErrorsGauge(backupScheduleName, backup.Status.Errors) } func persistBackup(backup *pkgbackup.Request, diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 005560ad8..072db9134 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -43,6 +43,8 @@ const ( backupDeletionSuccessTotal = "backup_deletion_success_total" backupDeletionFailureTotal = "backup_deletion_failure_total" backupLastSuccessfulTimestamp = "backup_last_successful_timestamp" + backupItemsTotalGauge = "backup_items_total" + backupItemsErrorsGauge = "backup_items_errors" restoreTotal = "restore_total" restoreAttemptTotal = "restore_attempt_total" restoreValidationFailedTotal = "restore_validation_failed_total" @@ -179,6 +181,22 @@ func NewServerMetrics() *ServerMetrics { }, []string{scheduleLabel}, ), + backupItemsTotalGauge: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricNamespace, + Name: backupItemsTotalGauge, + Help: "Total number of items backed up", + }, + []string{scheduleLabel}, + ), + backupItemsErrorsGauge: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricNamespace, + Name: backupItemsErrorsGauge, + Help: "Total number of errors encountered during backup", + }, + []string{scheduleLabel}, + ), restoreTotal: prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: metricNamespace, @@ -337,6 +355,12 @@ func (m *ServerMetrics) InitSchedule(scheduleName string) { if c, ok := m.metrics[backupDeletionFailureTotal].(*prometheus.CounterVec); ok { c.WithLabelValues(scheduleName).Add(0) } + if c, ok := m.metrics[backupItemsTotalGauge].(*prometheus.GaugeVec); ok { + c.WithLabelValues(scheduleName).Add(0) + } + if c, ok := m.metrics[backupItemsErrorsGauge].(*prometheus.GaugeVec); ok { + c.WithLabelValues(scheduleName).Add(0) + } if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok { c.WithLabelValues(scheduleName).Add(0) } @@ -486,6 +510,21 @@ func (m *ServerMetrics) RegisterBackupDeletionSuccess(backupSchedule string) { } } +// RegisterBackupItemsTotalGauge records the number of items to be backed up. +func (m *ServerMetrics) RegisterBackupItemsTotalGauge(backupSchedule string, items int) { + if c, ok := m.metrics[backupItemsTotalGauge].(*prometheus.GaugeVec); ok { + c.WithLabelValues(backupSchedule).Set(float64(items)) + } +} + +// RegisterBackupItemsErrorsGauge records the number of all error messages that were generated during +// execution of the backup. +func (m *ServerMetrics) RegisterBackupItemsErrorsGauge(backupSchedule string, items int) { + if c, ok := m.metrics[backupItemsErrorsGauge].(*prometheus.GaugeVec); ok { + c.WithLabelValues(backupSchedule).Set(float64(items)) + } +} + // toSeconds translates a time.Duration value into a float64 // representing the number of seconds in that duration. func toSeconds(d time.Duration) float64 {