diff --git a/internal/consensus/metrics.go b/internal/consensus/metrics.go index dbefb9a5b..15feee0c6 100644 --- a/internal/consensus/metrics.go +++ b/internal/consensus/metrics.go @@ -1,8 +1,16 @@ package consensus import ( + "strings" + "time" + "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/discard" +<<<<<<< HEAD +======= + + cstypes "github.com/tendermint/tendermint/internal/consensus/types" +>>>>>>> 165cc2947 (consensus: additional timing metrics (#7849)) "github.com/tendermint/tendermint/types" prometheus "github.com/go-kit/kit/metrics/prometheus" @@ -26,6 +34,9 @@ type Metrics struct { // Number of rounds. Rounds metrics.Gauge + // Histogram of round duration. + RoundDuration metrics.Histogram + // Number of validators. Validators metrics.Gauge // Total power of all validators. @@ -62,8 +73,18 @@ type Metrics struct { // Number of blockparts transmitted by peer. BlockParts metrics.Counter - // Histogram of time taken per step annotated with reason that the step proceeded. - StepTime metrics.Histogram + // Histogram of step duration. + StepDuration metrics.Histogram + stepStart time.Time + + // Histogram of time taken to receive a block in seconds, measured between when a new block is first + // discovered to when the block is completed. + BlockGossipReceiveLatency metrics.Histogram + blockGossipStart time.Time + + // Number of block parts received by the node, separated by whether the part + // was relevant to the block the node is trying to gather or not. + BlockGossipPartsReceived metrics.Counter // QuroumPrevoteMessageDelay is the interval in seconds between the proposal // timestamp and the timestamp of the earliest prevote that achieved a quorum @@ -103,7 +124,13 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "rounds", Help: "Number of rounds.", }, labels).With(labelsAndValues...), - + RoundDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "round_duration", + Help: "Time spent in a round.", + Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8), + }, labels).With(labelsAndValues...), Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -206,12 +233,28 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "block_parts", Help: "Number of blockparts transmitted by peer.", }, append(labels, "peer_id")).With(labelsAndValues...), - StepTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + BlockGossipReceiveLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, - Name: "step_time", + Name: "block_gossip_receive_latency", + Help: "Difference in seconds between when the validator learns of a new block" + + "and when the validator receives the last piece of the block.", + Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8), + }, labels).With(labelsAndValues...), + BlockGossipPartsReceived: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_gossip_parts_received", + Help: "Number of block parts received by the node, labeled by whether the " + + "part was relevant to the block the node was currently gathering or not.", + }, append(labels, "matches_current")).With(labelsAndValues...), + StepDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "step_duration", Help: "Time spent per step.", - }, append(labels, "step", "reason")).With(labelsAndValues...), + Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8), + }, append(labels, "step")).With(labelsAndValues...), QuorumPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -236,7 +279,9 @@ func NopMetrics() *Metrics { ValidatorLastSignedHeight: discard.NewGauge(), - Rounds: discard.NewGauge(), + Rounds: discard.NewGauge(), + RoundDuration: discard.NewHistogram(), + StepDuration: discard.NewHistogram(), Validators: discard.NewGauge(), ValidatorsPower: discard.NewGauge(), @@ -249,6 +294,7 @@ func NopMetrics() *Metrics { BlockIntervalSeconds: discard.NewHistogram(), +<<<<<<< HEAD NumTxs: discard.NewGauge(), BlockSizeBytes: discard.NewHistogram(), TotalTxs: discard.NewGauge(), @@ -258,6 +304,20 @@ func NopMetrics() *Metrics { BlockParts: discard.NewCounter(), QuorumPrevoteMessageDelay: discard.NewGauge(), FullPrevoteMessageDelay: discard.NewGauge(), +======= + NumTxs: discard.NewGauge(), + BlockSizeBytes: discard.NewHistogram(), + TotalTxs: discard.NewGauge(), + CommittedHeight: discard.NewGauge(), + BlockSyncing: discard.NewGauge(), + StateSyncing: discard.NewGauge(), + BlockParts: discard.NewCounter(), + BlockGossipReceiveLatency: discard.NewHistogram(), + BlockGossipPartsReceived: discard.NewCounter(), + QuorumPrevoteMessageDelay: discard.NewGauge(), + FullPrevoteMessageDelay: discard.NewGauge(), + ProposalTimestampDifference: discard.NewHistogram(), +>>>>>>> 165cc2947 (consensus: additional timing metrics (#7849)) } } @@ -268,3 +328,26 @@ func (m *Metrics) RecordConsMetrics(block *types.Block) { m.BlockSizeBytes.Observe(float64(block.Size())) m.CommittedHeight.Set(float64(block.Height)) } + +func (m *Metrics) MarkBlockGossipStarted() { + m.blockGossipStart = time.Now() +} + +func (m *Metrics) MarkBlockGossipComplete() { + m.BlockGossipReceiveLatency.Observe(time.Since(m.blockGossipStart).Seconds()) +} + +func (m *Metrics) MarkRound(r int32, st time.Time) { + m.Rounds.Set(float64(r)) + roundTime := time.Since(st).Seconds() + m.RoundDuration.Observe(roundTime) +} + +func (m *Metrics) MarkStep(s cstypes.RoundStepType) { + if !m.stepStart.IsZero() { + stepTime := time.Since(m.stepStart).Seconds() + stepName := strings.TrimPrefix(s.String(), "RoundStep") + m.StepDuration.With("step", stepName).Observe(stepTime) + } + m.stepStart = time.Now() +} diff --git a/internal/consensus/state.go b/internal/consensus/state.go index 885559590..a9eb8dd46 100644 --- a/internal/consensus/state.go +++ b/internal/consensus/state.go @@ -573,6 +573,14 @@ func (cs *State) updateHeight(height int64) { } func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { + if !cs.replayMode { + if round != cs.Round || round == 0 && step == cstypes.RoundStepNewRound { + cs.metrics.MarkRound(cs.Round, cs.StartTime) + } + if cs.Step != step { + cs.metrics.MarkStep(cs.Step) + } + } cs.Round = round cs.Step = step } @@ -1058,9 +1066,6 @@ func (cs *State) enterNewRound(height int64, round int32) { if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { cs.Logger.Error("failed publishing new round", "err", err) } - - cs.metrics.Rounds.Set(float64(round)) - // Wait for txs to be available in the mempool // before we enterPropose in round 0. If the last block changed the app hash, // we may need an empty "proof" block, and enterPropose immediately. @@ -1479,6 +1484,7 @@ func (cs *State) enterPrecommit(height int64, round int32) { if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { cs.ProposalBlock = nil + cs.metrics.MarkBlockGossipStarted() cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) } @@ -1573,6 +1579,7 @@ func (cs *State) enterCommit(height int64, commitRound int32) { // We're getting the wrong block. // Set up ProposalBlockParts and keep waiting. cs.ProposalBlock = nil + cs.metrics.MarkBlockGossipStarted() cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { @@ -1857,6 +1864,7 @@ func (cs *State) defaultSetProposal(proposal *types.Proposal) error { // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! if cs.ProposalBlockParts == nil { + cs.metrics.MarkBlockGossipStarted() cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) } @@ -1872,12 +1880,18 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID types.NodeID // Blocks might be reused, so round mismatch is OK if cs.Height != height { +<<<<<<< HEAD cs.Logger.Debug("received block part from wrong height", "height", height, "round", round) +======= + cs.logger.Debug("received block part from wrong height", "height", height, "round", round) + cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) +>>>>>>> 165cc2947 (consensus: additional timing metrics (#7849)) return false, nil } // We're not expecting a block part. if cs.ProposalBlockParts == nil { + cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) // NOTE: this can happen when we've gone to a higher round and // then receive parts from the previous round - not necessarily a bad peer. cs.Logger.Debug( @@ -1892,15 +1906,26 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID types.NodeID added, err = cs.ProposalBlockParts.AddPart(part) if err != nil { + if errors.Is(err, types.ErrPartSetInvalidProof) || errors.Is(err, types.ErrPartSetUnexpectedIndex) { + cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) + } return added, err } + + cs.metrics.BlockGossipPartsReceived.With("matches_current", "true").Add(1) + if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, ) } if added && cs.ProposalBlockParts.IsComplete() { +<<<<<<< HEAD bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader()) +======= + cs.metrics.MarkBlockGossipComplete() + bz, err := io.ReadAll(cs.ProposalBlockParts.GetReader()) +>>>>>>> 165cc2947 (consensus: additional timing metrics (#7849)) if err != nil { return added, err } @@ -2120,6 +2145,7 @@ func (cs *State) addVote(vote *types.Vote, peerID types.NodeID) (added bool, err } if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { + cs.metrics.MarkBlockGossipStarted() cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) }