Add Gauge metric for BSL availability

The label of the gauge is the name of BSL

Signed-off-by: Daniel Jiang <daniel.jiang@broadcom.com>
This commit is contained in:
Daniel Jiang
2025-07-02 14:55:02 +08:00
committed by Xun Jiang/Bruce Jiang
parent 28b2d11b51
commit a550910f36
6 changed files with 40 additions and 4 deletions

View File

@@ -0,0 +1 @@
Add Gauge metric for BSL availability

2
go.mod
View File

@@ -2,8 +2,6 @@ module github.com/vmware-tanzu/velero
go 1.23.0 go 1.23.0
toolchain go1.23.6
require ( require (
cloud.google.com/go/storage v1.54.0 cloud.google.com/go/storage v1.54.0
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0

View File

@@ -590,6 +590,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}, },
newPluginManager, newPluginManager,
backupStoreGetter, backupStoreGetter,
s.metrics,
s.logger, s.logger,
) )
if err := bslr.SetupWithManager(s.mgr); err != nil { if err := bslr.SetupWithManager(s.mgr); err != nil {

View File

@@ -21,6 +21,8 @@ import (
"strings" "strings"
"time" "time"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -53,7 +55,7 @@ type backupStorageLocationReconciler struct {
// replaced with fakes for testing. // replaced with fakes for testing.
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager newPluginManager func(logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter backupStoreGetter persistence.ObjectBackupStoreGetter
metrics *metrics.ServerMetrics
log logrus.FieldLogger log logrus.FieldLogger
} }
@@ -64,6 +66,7 @@ func NewBackupStorageLocationReconciler(
defaultBackupLocationInfo storage.DefaultBackupLocationInfo, defaultBackupLocationInfo storage.DefaultBackupLocationInfo,
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager, newPluginManager func(logrus.FieldLogger) clientmgmt.Manager,
backupStoreGetter persistence.ObjectBackupStoreGetter, backupStoreGetter persistence.ObjectBackupStoreGetter,
metrics *metrics.ServerMetrics,
log logrus.FieldLogger) *backupStorageLocationReconciler { log logrus.FieldLogger) *backupStorageLocationReconciler {
return &backupStorageLocationReconciler{ return &backupStorageLocationReconciler{
ctx: ctx, ctx: ctx,
@@ -71,6 +74,7 @@ func NewBackupStorageLocationReconciler(
defaultBackupLocationInfo: defaultBackupLocationInfo, defaultBackupLocationInfo: defaultBackupLocationInfo,
newPluginManager: newPluginManager, newPluginManager: newPluginManager,
backupStoreGetter: backupStoreGetter, backupStoreGetter: backupStoreGetter,
metrics: metrics,
log: log, log: log,
} }
} }
@@ -164,8 +168,10 @@ func (r *backupStorageLocationReconciler) logReconciledPhase(defaultFound bool,
switch phase { switch phase {
case velerov1api.BackupStorageLocationPhaseAvailable: case velerov1api.BackupStorageLocationPhaseAvailable:
availableBSLs = append(availableBSLs, &locationList.Items[i]) availableBSLs = append(availableBSLs, &locationList.Items[i])
r.metrics.RegisterBackupLocationAvailable(locationList.Items[i].Name)
case velerov1api.BackupStorageLocationPhaseUnavailable: case velerov1api.BackupStorageLocationPhaseUnavailable:
unAvailableBSLs = append(unAvailableBSLs, &locationList.Items[i]) unAvailableBSLs = append(unAvailableBSLs, &locationList.Items[i])
r.metrics.RegisterBackupLocationUnavailable(locationList.Items[i].Name)
default: default:
unknownBSLs = append(unknownBSLs, &locationList.Items[i]) unknownBSLs = append(unknownBSLs, &locationList.Items[i])
} }

View File

@@ -21,6 +21,8 @@ import (
"testing" "testing"
"time" "time"
"github.com/vmware-tanzu/velero/pkg/metrics"
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" . "github.com/onsi/gomega"
"github.com/pkg/errors" "github.com/pkg/errors"
@@ -92,6 +94,7 @@ var _ = Describe("Backup Storage Location Reconciler", func() {
}, },
newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager },
backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores), backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores),
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(), log: velerotest.NewLogger(),
} }
@@ -157,6 +160,7 @@ var _ = Describe("Backup Storage Location Reconciler", func() {
}, },
newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager },
backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores), backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores),
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(), log: velerotest.NewLogger(),
} }
@@ -245,6 +249,7 @@ func TestEnsureSingleDefaultBSL(t *testing.T) {
ctx: context.Background(), ctx: context.Background(),
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locations).Build(), client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locations).Build(),
defaultBackupLocationInfo: test.defaultBackupInfo, defaultBackupLocationInfo: test.defaultBackupInfo,
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(), log: velerotest.NewLogger(),
} }
defaultFound, err := r.ensureSingleDefaultBSL(test.locations) defaultFound, err := r.ensureSingleDefaultBSL(test.locations)
@@ -289,6 +294,7 @@ func TestBSLReconcile(t *testing.T) {
ctx: context.Background(), ctx: context.Background(),
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locationList).Build(), client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locationList).Build(),
newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager },
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(), log: velerotest.NewLogger(),
} }

View File

@@ -47,6 +47,7 @@ const (
backupItemsErrorsGauge = "backup_items_errors" backupItemsErrorsGauge = "backup_items_errors"
backupWarningTotal = "backup_warning_total" backupWarningTotal = "backup_warning_total"
backupLastStatus = "backup_last_status" backupLastStatus = "backup_last_status"
backupLocationStatus = "backup_location_status_gauge"
restoreTotal = "restore_total" restoreTotal = "restore_total"
restoreAttemptTotal = "restore_attempt_total" restoreAttemptTotal = "restore_attempt_total"
restoreValidationFailedTotal = "restore_validation_failed_total" restoreValidationFailedTotal = "restore_validation_failed_total"
@@ -77,6 +78,7 @@ const (
// Labels // Labels
nodeMetricLabel = "node" nodeMetricLabel = "node"
podVolumeOperationLabel = "operation" podVolumeOperationLabel = "operation"
bslNameLabel = "backup_location_name"
pvbNameLabel = "pod_volume_backup" pvbNameLabel = "pod_volume_backup"
scheduleLabel = "schedule" scheduleLabel = "schedule"
backupNameLabel = "backupName" backupNameLabel = "backupName"
@@ -228,6 +230,14 @@ func NewServerMetrics() *ServerMetrics {
}, },
[]string{scheduleLabel}, []string{scheduleLabel},
), ),
backupLocationStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: backupLocationStatus,
Help: "The status of backup location. A value of 1 is available, 0 is unavailable",
},
[]string{bslNameLabel},
),
restoreTotal: prometheus.NewGauge( restoreTotal: prometheus.NewGauge(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: metricNamespace, Namespace: metricNamespace,
@@ -888,3 +898,17 @@ func (m *ServerMetrics) RegisterCSISnapshotFailures(backupSchedule, backupName s
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsFailed)) c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsFailed))
} }
} }
// RegisterBackupLocationAvailable records the availability of a backup location.
func (m *ServerMetrics) RegisterBackupLocationAvailable(backupLocationName string) {
if g, ok := m.metrics[backupLocationStatus].(*prometheus.GaugeVec); ok {
g.WithLabelValues(backupLocationName).Set(float64(1))
}
}
// RegisterBackupLocationUnavailable records the availability of a backup location.
func (m *ServerMetrics) RegisterBackupLocationUnavailable(backupLocationName string) {
if g, ok := m.metrics[backupLocationStatus].(*prometheus.GaugeVec); ok {
g.WithLabelValues(backupLocationName).Set(float64(0))
}
}