Add Gauge metric for BSL availability

The label of the gauge is the name of BSL

Signed-off-by: Daniel Jiang <daniel.jiang@broadcom.com>
This commit is contained in:
Daniel Jiang
2025-07-02 14:55:02 +08:00
committed by Xun Jiang/Bruce Jiang
parent 28b2d11b51
commit a550910f36
6 changed files with 40 additions and 4 deletions

View File

@@ -0,0 +1 @@
Add Gauge metric for BSL availability

2
go.mod
View File

@@ -2,8 +2,6 @@ module github.com/vmware-tanzu/velero
go 1.23.0
toolchain go1.23.6
require (
cloud.google.com/go/storage v1.54.0
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0

View File

@@ -590,6 +590,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
},
newPluginManager,
backupStoreGetter,
s.metrics,
s.logger,
)
if err := bslr.SetupWithManager(s.mgr); err != nil {

View File

@@ -21,6 +21,8 @@ import (
"strings"
"time"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -53,8 +55,8 @@ type backupStorageLocationReconciler struct {
// replaced with fakes for testing.
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter
log logrus.FieldLogger
metrics *metrics.ServerMetrics
log logrus.FieldLogger
}
// NewBackupStorageLocationReconciler initialize and return a backupStorageLocationReconciler struct
@@ -64,6 +66,7 @@ func NewBackupStorageLocationReconciler(
defaultBackupLocationInfo storage.DefaultBackupLocationInfo,
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager,
backupStoreGetter persistence.ObjectBackupStoreGetter,
metrics *metrics.ServerMetrics,
log logrus.FieldLogger) *backupStorageLocationReconciler {
return &backupStorageLocationReconciler{
ctx: ctx,
@@ -71,6 +74,7 @@ func NewBackupStorageLocationReconciler(
defaultBackupLocationInfo: defaultBackupLocationInfo,
newPluginManager: newPluginManager,
backupStoreGetter: backupStoreGetter,
metrics: metrics,
log: log,
}
}
@@ -164,8 +168,10 @@ func (r *backupStorageLocationReconciler) logReconciledPhase(defaultFound bool,
switch phase {
case velerov1api.BackupStorageLocationPhaseAvailable:
availableBSLs = append(availableBSLs, &locationList.Items[i])
r.metrics.RegisterBackupLocationAvailable(locationList.Items[i].Name)
case velerov1api.BackupStorageLocationPhaseUnavailable:
unAvailableBSLs = append(unAvailableBSLs, &locationList.Items[i])
r.metrics.RegisterBackupLocationUnavailable(locationList.Items[i].Name)
default:
unknownBSLs = append(unknownBSLs, &locationList.Items[i])
}

View File

@@ -21,6 +21,8 @@ import (
"testing"
"time"
"github.com/vmware-tanzu/velero/pkg/metrics"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
@@ -92,6 +94,7 @@ var _ = Describe("Backup Storage Location Reconciler", func() {
},
newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager },
backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores),
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(),
}
@@ -157,6 +160,7 @@ var _ = Describe("Backup Storage Location Reconciler", func() {
},
newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager },
backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores),
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(),
}
@@ -245,6 +249,7 @@ func TestEnsureSingleDefaultBSL(t *testing.T) {
ctx: context.Background(),
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locations).Build(),
defaultBackupLocationInfo: test.defaultBackupInfo,
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(),
}
defaultFound, err := r.ensureSingleDefaultBSL(test.locations)
@@ -289,6 +294,7 @@ func TestBSLReconcile(t *testing.T) {
ctx: context.Background(),
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locationList).Build(),
newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager },
metrics: metrics.NewServerMetrics(),
log: velerotest.NewLogger(),
}

View File

@@ -47,6 +47,7 @@ const (
backupItemsErrorsGauge = "backup_items_errors"
backupWarningTotal = "backup_warning_total"
backupLastStatus = "backup_last_status"
backupLocationStatus = "backup_location_status_gauge"
restoreTotal = "restore_total"
restoreAttemptTotal = "restore_attempt_total"
restoreValidationFailedTotal = "restore_validation_failed_total"
@@ -77,6 +78,7 @@ const (
// Labels
nodeMetricLabel = "node"
podVolumeOperationLabel = "operation"
bslNameLabel = "backup_location_name"
pvbNameLabel = "pod_volume_backup"
scheduleLabel = "schedule"
backupNameLabel = "backupName"
@@ -228,6 +230,14 @@ func NewServerMetrics() *ServerMetrics {
},
[]string{scheduleLabel},
),
backupLocationStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: backupLocationStatus,
Help: "The status of backup location. A value of 1 is available, 0 is unavailable",
},
[]string{bslNameLabel},
),
restoreTotal: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: metricNamespace,
@@ -888,3 +898,17 @@ func (m *ServerMetrics) RegisterCSISnapshotFailures(backupSchedule, backupName s
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsFailed))
}
}
// RegisterBackupLocationAvailable records the availability of a backup location.
func (m *ServerMetrics) RegisterBackupLocationAvailable(backupLocationName string) {
if g, ok := m.metrics[backupLocationStatus].(*prometheus.GaugeVec); ok {
g.WithLabelValues(backupLocationName).Set(float64(1))
}
}
// RegisterBackupLocationUnavailable records the availability of a backup location.
func (m *ServerMetrics) RegisterBackupLocationUnavailable(backupLocationName string) {
if g, ok := m.metrics[backupLocationStatus].(*prometheus.GaugeVec); ok {
g.WithLabelValues(backupLocationName).Set(float64(0))
}
}