diff --git a/weed/server/volume_grpc_scrub.go b/weed/server/volume_grpc_scrub.go index 8c6a235b9..68acc5616 100644 --- a/weed/server/volume_grpc_scrub.go +++ b/weed/server/volume_grpc_scrub.go @@ -4,8 +4,11 @@ import ( "context" "errors" "fmt" + "time" + "github.com/prometheus/client_golang/prometheus" "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" + "github.com/seaweedfs/seaweedfs/weed/stats" "github.com/seaweedfs/seaweedfs/weed/storage" "github.com/seaweedfs/seaweedfs/weed/storage/needle" ) @@ -68,6 +71,11 @@ func (vs *VolumeServer) ScrubVolume(ctx context.Context, req *volume_server_pb.S } } } + + scrubLabels := prometheus.Labels{"mode": req.GetMode().String()} + stats.VolumeServerScrubLastTimeSeconds.With(scrubLabels).Set(float64(time.Now().Unix())) + stats.VolumeServerScrubVolumeFailures.With(scrubLabels).Add(float64(len(brokenVolumes))) + if len(errs) != 0 { return nil, errors.Join(errs...) } @@ -129,6 +137,11 @@ func (vs *VolumeServer) ScrubEcVolume(ctx context.Context, req *volume_server_pb } } + scrubLabels := prometheus.Labels{"mode": req.GetMode().String()} + stats.VolumeServerScrubLastTimeSeconds.With(scrubLabels).Set(float64(time.Now().Unix())) + stats.VolumeServerScrubVolumeFailures.With(scrubLabels).Add(float64(len(brokenVolumeIds))) + stats.VolumeServerScrubShardFailures.With(scrubLabels).Add(float64(len(brokenShardInfos))) + res := &volume_server_pb.ScrubEcVolumeResponse{ TotalVolumes: totalVolumes, TotalFiles: totalFiles, diff --git a/weed/stats/metrics.go b/weed/stats/metrics.go index e943f5726..a66c81e88 100644 --- a/weed/stats/metrics.go +++ b/weed/stats/metrics.go @@ -394,6 +394,30 @@ var ( Help: "Counter of overall failed file write requests from clients.", }) + VolumeServerScrubLastTimeSeconds = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "volumeServer", + Name: "scrub_last_time_seconds", + Help: "Last scrub execution time, as seconds since UNIX epoch.", + }, []string{"mode"}) + + VolumeServerScrubVolumeFailures = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: "volumeServer", + Name: "scrub_volume_failures", + Help: "Counter of overall volumes with issues detected during scrubbing.", + }, []string{"mode"}) + + VolumeServerScrubShardFailures = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: "volumeServer", + Name: "scrub_shard_failures", + Help: "Counter of overall EC shards with issues detected during scrubbing.", + }, []string{"mode"}) + S3RequestCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: Namespace, @@ -564,6 +588,9 @@ func init() { Gather.MustRegister(VolumeServerFileReadFailures) Gather.MustRegister(VolumeServerFileReadInvalidNeedles) Gather.MustRegister(VolumeServerFileWriteFailures) + Gather.MustRegister(VolumeServerScrubLastTimeSeconds) + Gather.MustRegister(VolumeServerScrubVolumeFailures) + Gather.MustRegister(VolumeServerScrubShardFailures) Gather.MustRegister(S3RequestCounter) Gather.MustRegister(S3HandlerCounter)