mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-03 11:45:20 +00:00
Add CSI VolumeSnapshot related metrics.
Signed-off-by: Xun Jiang <jxun@vmware.com>
This commit is contained in:
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package v1
|
||||
|
||||
import (
|
||||
resource "k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
@@ -310,6 +311,21 @@ type BackupStatus struct {
|
||||
// +optional
|
||||
// +nullable
|
||||
Progress *BackupProgress `json:"progress,omitempty"`
|
||||
|
||||
// CsiVolumeSnapshotsAttempted is the total number of attempted
|
||||
// CSI VolumeSnapshots for this backup.
|
||||
// +optional
|
||||
CsiVolumeSnapshotsAttempted int `json:"csiVolumeSnapshotsAttempted,omitempty"`
|
||||
|
||||
// CsiVolumeSnapshotsCompleted is the total number of successfully
|
||||
// completed CSI VolumeSnapshots for this backup.
|
||||
// +optional
|
||||
CsiVolumeSnapshotsCompleted int `json:"csiVolumeSnapshotsCompleted,omitempty"`
|
||||
|
||||
// CsiVolumeSnapshotsStorageTotal is the total storage size of created
|
||||
// snapshots for this backup.
|
||||
// +optional
|
||||
CsiVolumeSnapshotsStorageTotal resource.Quantity `json:"csiVolumeSnapshotsStorageTotal,omitempty"`
|
||||
}
|
||||
|
||||
// BackupProgress stores information about the progress of a Backup's execution.
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
|
||||
"github.com/vmware-tanzu/velero/internal/hook"
|
||||
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
|
||||
"github.com/vmware-tanzu/velero/pkg/plugin/framework"
|
||||
@@ -48,6 +49,7 @@ type Request struct {
|
||||
VolumeSnapshots []*volume.Snapshot
|
||||
PodVolumeBackups []*velerov1api.PodVolumeBackup
|
||||
BackedUpItems map[itemKey]struct{}
|
||||
CsiSnapshots []*snapshotv1api.VolumeSnapshot
|
||||
}
|
||||
|
||||
// BackupResourceList returns the list of backed up resources grouped by the API
|
||||
|
||||
@@ -636,6 +636,14 @@ func (c *backupController) runBackup(backup *pkgbackup.Request) error {
|
||||
}
|
||||
}
|
||||
|
||||
backup.Status.CsiVolumeSnapshotsAttempted = len(backup.CsiSnapshots)
|
||||
for _, vs := range backup.CsiSnapshots {
|
||||
if *vs.Status.ReadyToUse {
|
||||
backup.Status.CsiVolumeSnapshotsCompleted++
|
||||
backup.Status.CsiVolumeSnapshotsStorageTotal.Add(*vs.Status.RestoreSize)
|
||||
}
|
||||
}
|
||||
|
||||
backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel)
|
||||
backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel)
|
||||
|
||||
@@ -694,6 +702,19 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac
|
||||
serverMetrics.RegisterVolumeSnapshotAttempts(backupScheduleName, backup.Status.VolumeSnapshotsAttempted)
|
||||
serverMetrics.RegisterVolumeSnapshotSuccesses(backupScheduleName, backup.Status.VolumeSnapshotsCompleted)
|
||||
serverMetrics.RegisterVolumeSnapshotFailures(backupScheduleName, backup.Status.VolumeSnapshotsAttempted-backup.Status.VolumeSnapshotsCompleted)
|
||||
|
||||
if features.IsEnabled(velerov1api.CSIFeatureFlag) {
|
||||
serverMetrics.RegisterCsiSnapshotAttempts(backupScheduleName, backup.Name, backup.Status.CsiVolumeSnapshotsAttempted)
|
||||
serverMetrics.RegisterCsiSnapshotSuccesses(backupScheduleName, backup.Name, backup.Status.CsiVolumeSnapshotsCompleted)
|
||||
serverMetrics.RegisterCsiSnapshotFailures(backupScheduleName, backup.Name, backup.Status.CsiVolumeSnapshotsAttempted-backup.Status.CsiVolumeSnapshotsCompleted)
|
||||
storageSize, ret := backup.Status.CsiVolumeSnapshotsStorageTotal.AsInt64()
|
||||
if !ret {
|
||||
log.WithError(fmt.Errorf("fail to convert CSI snapshot size: %v to int64", backup.Status.CsiVolumeSnapshotsStorageTotal))
|
||||
storageSize = 0
|
||||
}
|
||||
serverMetrics.RegisterCsiStorageSizeAdd(backupScheduleName, backup.Name, storageSize)
|
||||
}
|
||||
|
||||
if backup.Status.Progress != nil {
|
||||
serverMetrics.RegisterBackupItemsTotalGauge(backupScheduleName, backup.Status.Progress.TotalItems)
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ import (
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@@ -39,6 +40,7 @@ import (
|
||||
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
|
||||
pkgbackup "github.com/vmware-tanzu/velero/pkg/backup"
|
||||
"github.com/vmware-tanzu/velero/pkg/discovery"
|
||||
"github.com/vmware-tanzu/velero/pkg/features"
|
||||
velerov1client "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned/typed/velero/v1"
|
||||
velerov1informers "github.com/vmware-tanzu/velero/pkg/generated/informers/externalversions/velero/v1"
|
||||
velerov1listers "github.com/vmware-tanzu/velero/pkg/generated/listers/velero/v1"
|
||||
@@ -407,6 +409,25 @@ func (c *backupDeletionController) processRequest(req *velerov1api.DeleteBackupR
|
||||
c.metrics.RegisterBackupDeletionFailed(backupScheduleName)
|
||||
}
|
||||
|
||||
if features.IsEnabled(velerov1api.CSIFeatureFlag) {
|
||||
vss, err := backupStore.GetCSIVolumeSnapshots(backup.Name)
|
||||
if err != nil {
|
||||
errs = append(errs, err.Error())
|
||||
}
|
||||
|
||||
var restoreSizeTotal resource.Quantity
|
||||
for _, vs := range vss {
|
||||
restoreSizeTotal.Add(*vs.Status.RestoreSize)
|
||||
}
|
||||
|
||||
storageSize, ret := restoreSizeTotal.AsInt64()
|
||||
if !ret {
|
||||
log.WithError(fmt.Errorf("fail to convert CSI snapshot size: %v to int64", backup.Status.CsiVolumeSnapshotsStorageTotal))
|
||||
storageSize = 0
|
||||
}
|
||||
c.metrics.RegisterCsiStorageSizeSub(backupScheduleName, backup.Name, storageSize)
|
||||
}
|
||||
|
||||
// Update status to processed and record errors
|
||||
req, err = c.patchDeleteBackupRequest(req, func(r *velerov1api.DeleteBackupRequest) {
|
||||
r.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
|
||||
|
||||
@@ -54,6 +54,10 @@ const (
|
||||
volumeSnapshotAttemptTotal = "volume_snapshot_attempt_total"
|
||||
volumeSnapshotSuccessTotal = "volume_snapshot_success_total"
|
||||
volumeSnapshotFailureTotal = "volume_snapshot_failure_total"
|
||||
csiSnapshotAttemptTotal = "csi_snapshot_attempt_total"
|
||||
csiSnapshotSuccessTotal = "csi_snapshot_success_total"
|
||||
csiSnapshotFailureTotal = "csi_snapshot_failure_total"
|
||||
csiSnapshotStorageTotal = "csi_snapshot_storage_total"
|
||||
|
||||
// Restic metrics
|
||||
podVolumeBackupEnqueueTotal = "pod_volume_backup_enqueue_count"
|
||||
@@ -67,8 +71,6 @@ const (
|
||||
pvbNameLabel = "pod_volume_backup"
|
||||
scheduleLabel = "schedule"
|
||||
backupNameLabel = "backupName"
|
||||
|
||||
secondsInMinute = 60.0
|
||||
)
|
||||
|
||||
// NewServerMetrics returns new ServerMetrics
|
||||
@@ -268,6 +270,38 @@ func NewServerMetrics() *ServerMetrics {
|
||||
},
|
||||
[]string{scheduleLabel},
|
||||
),
|
||||
csiSnapshotAttemptTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: csiSnapshotAttemptTotal,
|
||||
Help: "Total number of CSI attempted volume snapshots",
|
||||
},
|
||||
[]string{scheduleLabel, backupNameLabel},
|
||||
),
|
||||
csiSnapshotSuccessTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: csiSnapshotSuccessTotal,
|
||||
Help: "Total number of CSI successful volume snapshots",
|
||||
},
|
||||
[]string{scheduleLabel, backupNameLabel},
|
||||
),
|
||||
csiSnapshotFailureTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: csiSnapshotFailureTotal,
|
||||
Help: "Total number of CSI failed volume snapshots",
|
||||
},
|
||||
[]string{scheduleLabel, backupNameLabel},
|
||||
),
|
||||
csiSnapshotStorageTotal: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: csiSnapshotStorageTotal,
|
||||
Help: "Total size of CSI volume snapshots storage size",
|
||||
},
|
||||
[]string{scheduleLabel, backupNameLabel},
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -385,6 +419,18 @@ func (m *ServerMetrics) InitSchedule(scheduleName string) {
|
||||
if c, ok := m.metrics[volumeSnapshotFailureTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
if c, ok := m.metrics[csiSnapshotAttemptTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
if c, ok := m.metrics[csiSnapshotSuccessTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
if c, ok := m.metrics[csiSnapshotFailureTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
if c, ok := m.metrics[csiSnapshotStorageTotal].(*prometheus.GaugeVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
}
|
||||
|
||||
// InitSchedule initializes counter metrics for a node.
|
||||
@@ -593,3 +639,38 @@ func (m *ServerMetrics) RegisterVolumeSnapshotFailures(backupSchedule string, vo
|
||||
c.WithLabelValues(backupSchedule).Add(float64(volumeSnapshotsFailed))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCsiSnapshotAttempts records an attempt to snapshot a volume by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCsiSnapshotAttempts(backupSchedule, backupName string, csiSnapshotsAttempted int) {
|
||||
if c, ok := m.metrics[csiSnapshotAttemptTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsAttempted))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCsiSnapshotSuccesses records a completed volume snapshot by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCsiSnapshotSuccesses(backupSchedule, backupName string, csiSnapshotCompleted int) {
|
||||
if c, ok := m.metrics[csiSnapshotSuccessTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotCompleted))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCsiSnapshotFailures records a failed volume snapshot by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCsiSnapshotFailures(backupSchedule, backupName string, csiSnapshotsFailed int) {
|
||||
if c, ok := m.metrics[csiSnapshotFailureTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsFailed))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCsiStorageSizeAdd records volume snapshot's storage size increase created by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCsiStorageSizeAdd(backupSchedule, backupName string, csiStorageSize int64) {
|
||||
if g, ok := m.metrics[csiSnapshotStorageTotal].(*prometheus.GaugeVec); ok {
|
||||
g.WithLabelValues(backupSchedule, backupName).Add(float64(csiStorageSize))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCsiStorageSizeSub records volume snapshot's storage size decrease created by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCsiStorageSizeSub(backupSchedule, backupName string, csiStorageSize int64) {
|
||||
if g, ok := m.metrics[csiSnapshotStorageTotal].(*prometheus.GaugeVec); ok {
|
||||
g.WithLabelValues(backupSchedule, backupName).Sub(float64(csiStorageSize))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user