From d7c3ba825a8c44373c1472f44e4bdf677c23efb7 Mon Sep 17 00:00:00 2001 From: William Banfield Date: Thu, 17 Feb 2022 17:05:19 -0500 Subject: [PATCH] add histograms for round and step durations --- internal/consensus/metrics.go | 48 ++++++++++++++++++++++++++++------- internal/consensus/state.go | 11 +++++--- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/internal/consensus/metrics.go b/internal/consensus/metrics.go index 2de3a44ad..9124b70f4 100644 --- a/internal/consensus/metrics.go +++ b/internal/consensus/metrics.go @@ -1,11 +1,13 @@ package consensus import ( + "strings" "time" "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/discard" + cstypes "github.com/tendermint/tendermint/internal/consensus/types" "github.com/tendermint/tendermint/types" prometheus "github.com/go-kit/kit/metrics/prometheus" @@ -29,6 +31,9 @@ type Metrics struct { // Number of rounds. Rounds metrics.Gauge + // Histogram of round duration. + RoundDuration metrics.Histogram + // Number of validators. Validators metrics.Gauge // Total power of all validators. @@ -65,8 +70,9 @@ type Metrics struct { // Number of blockparts transmitted by peer. BlockParts metrics.Counter - // Histogram of time taken per step annotated with reason that the step proceeded. - StepTime metrics.Histogram + // Histogram of step duration. + StepDuration metrics.Histogram + stepStart time.Time // Time taken to receive a block in seconds, measured between when a new block is first // discovered to when the block is completed. @@ -120,7 +126,13 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "rounds", Help: "Number of rounds.", }, labels).With(labelsAndValues...), - + RoundDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "round_duration", + Help: "Time spent in a round", + Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8), + }, labels).With(labelsAndValues...), Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -237,12 +249,13 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Help: "Number of block parts received by the node, labeled by whether the" + "part was relevant to the block the node was currently gathering or not", }, append(labels, "matches_current")).With(labelsAndValues...), - StepTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + StepDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, - Name: "step_time", + Name: "step_duration", Help: "Time spent per step.", - }, append(labels, "step", "reason")).With(labelsAndValues...), + Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8), + }, append(labels, "step")).With(labelsAndValues...), QuorumPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -276,7 +289,9 @@ func NopMetrics() *Metrics { ValidatorLastSignedHeight: discard.NewGauge(), - Rounds: discard.NewGauge(), + Rounds: discard.NewGauge(), + RoundDuration: discard.NewHistogram(), + StepDuration: discard.NewHistogram(), Validators: discard.NewGauge(), ValidatorsPower: discard.NewGauge(), @@ -296,7 +311,7 @@ func NopMetrics() *Metrics { BlockSyncing: discard.NewGauge(), StateSyncing: discard.NewGauge(), BlockParts: discard.NewCounter(), - BlockGossipReceiveTime: discard.NewGauge(), + BlockGossipReceiveTime: discard.NewHistogram(), BlockGossipPartsReceived: discard.NewCounter(), QuorumPrevoteMessageDelay: discard.NewGauge(), FullPrevoteMessageDelay: discard.NewGauge(), @@ -317,5 +332,20 @@ func (m *Metrics) MarkBlockGossipStarted() { } func (m *Metrics) MarkBlockGossipComplete() { - m.BlockGossipReceiveTime.Set(float64(time.Now().Sub(m.blockGossipStart).Seconds())) + m.BlockGossipReceiveTime.Observe(float64(time.Now().Sub(m.blockGossipStart).Seconds())) +} + +func (m *Metrics) MarkRound(r int32, st time.Time) { + m.Rounds.Set(float64(r)) + roundTime := float64(time.Now().Sub(st).Seconds()) + m.RoundDuration.Observe(roundTime) +} + +func (m *Metrics) MarkStep(s cstypes.RoundStepType) { + if !m.stepStart.IsZero() { + stepTime := float64(time.Now().Sub(m.stepStart).Seconds()) + stepName := strings.TrimPrefix(s.String(), "RoundStep") + m.StepDuration.With("step", stepName).Observe(stepTime) + } + m.stepStart = time.Now() } diff --git a/internal/consensus/state.go b/internal/consensus/state.go index 7f0fa1fc3..8de115b7f 100644 --- a/internal/consensus/state.go +++ b/internal/consensus/state.go @@ -628,6 +628,14 @@ func (cs *State) updateHeight(height int64) { } func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { + if !cs.replayMode { + if round != cs.Round || round == 0 && step == cstypes.RoundStepNewRound { + cs.metrics.MarkRound(cs.Round, cs.StartTime) + } + if cs.Step != step { + cs.metrics.MarkStep(cs.Step) + } + } cs.Round = round cs.Step = step } @@ -1124,9 +1132,6 @@ func (cs *State) enterNewRound(ctx context.Context, height int64, round int32) { if err := cs.eventBus.PublishEventNewRound(ctx, cs.NewRoundEvent()); err != nil { cs.logger.Error("failed publishing new round", "err", err) } - - cs.metrics.Rounds.Set(float64(round)) - // Wait for txs to be available in the mempool // before we enterPropose in round 0. If the last block changed the app hash, // we may need an empty "proof" block, and enterPropose immediately.