From 7bd86ec0046182945976d49592ac6f8e981d0815 Mon Sep 17 00:00:00 2001 From: William Banfield <4561443+williambanfield@users.noreply.github.com> Date: Wed, 17 Aug 2022 09:37:45 -0400 Subject: [PATCH] consensus: backport abci and consensus metrics (#9273) Partial backport of #8480 --- consensus/metrics.gen.go | 28 +++++++++++++++++++++++ consensus/metrics.go | 40 +++++++++++++++++++++++++++++++++ consensus/state.go | 11 +++++++++ docs/tendermint-core/metrics.md | 8 +++++++ state/execution.go | 4 ++++ state/metrics.gen.go | 16 ++++++++++++- state/metrics.go | 8 +++++++ 7 files changed, 114 insertions(+), 1 deletion(-) diff --git a/consensus/metrics.gen.go b/consensus/metrics.gen.go index 70be488ef..5d117c77b 100644 --- a/consensus/metrics.gen.go +++ b/consensus/metrics.gen.go @@ -162,6 +162,30 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "full_prevote_delay", Help: "Interval in seconds between the proposal timestamp and the timestamp of the latest prevote in a round where all validators voted.", }, append(labels, "proposer_address")).With(labelsAndValues...), + ProposalReceiveCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "proposal_receive_count", + Help: "ProposalReceiveCount is the total number of proposals received by this node since process start. The metric is annotated by the status of the proposal from the application, either 'accepted' or 'rejected'.", + }, append(labels, "status")).With(labelsAndValues...), + ProposalCreateCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "proposal_create_count", + Help: "ProposalCreationCount is the total number of proposals created by this node since process start.", + }, labels).With(labelsAndValues...), + RoundVotingPowerPercent: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "round_voting_power_percent", + Help: "RoundVotingPowerPercent is the percentage of the total voting power received with a round. The value begins at 0 for each round and approaches 1.0 as additional voting power is observed. The metric is labeled by vote type.", + }, labels).With(labelsAndValues...), + LateVotes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "late_votes", + Help: "LateVotes stores the number of votes that were received by this node that correspond to earlier heights and rounds than this node is currently in.", + }, labels).With(labelsAndValues...), } } @@ -191,5 +215,9 @@ func NopMetrics() *Metrics { BlockGossipPartsReceived: discard.NewCounter(), QuorumPrevoteDelay: discard.NewGauge(), FullPrevoteDelay: discard.NewGauge(), + ProposalReceiveCount: discard.NewCounter(), + ProposalCreateCount: discard.NewCounter(), + RoundVotingPowerPercent: discard.NewGauge(), + LateVotes: discard.NewCounter(), } } diff --git a/consensus/metrics.go b/consensus/metrics.go index de0da45de..6ba132b29 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -6,6 +6,7 @@ import ( "github.com/go-kit/kit/metrics" cstypes "github.com/tendermint/tendermint/consensus/types" + tmproto "github.com/tendermint/tendermint/proto/tendermint/types" "github.com/tendermint/tendermint/types" ) @@ -92,6 +93,26 @@ type Metrics struct { // of the voting power on the network issued prevotes. //metrics:Interval in seconds between the proposal timestamp and the timestamp of the latest prevote in a round where all validators voted. FullPrevoteDelay metrics.Gauge `metrics_labels:"proposer_address"` + + // ProposalReceiveCount is the total number of proposals received by this node + // since process start. + // The metric is annotated by the status of the proposal from the application, + // either 'accepted' or 'rejected'. + ProposalReceiveCount metrics.Counter `metrics_labels:"status"` + + // ProposalCreationCount is the total number of proposals created by this node + // since process start. + ProposalCreateCount metrics.Counter + + // RoundVotingPowerPercent is the percentage of the total voting power received + // with a round. The value begins at 0 for each round and approaches 1.0 as + // additional voting power is observed. The metric is labeled by vote type. + RoundVotingPowerPercent metrics.Gauge + + // LateVotes stores the number of votes that were received by this node that + // correspond to earlier heights and rounds than this node is currently + // in. + LateVotes metrics.Counter } // RecordConsMetrics uses for recording the block related metrics during fast-sync. @@ -102,12 +123,31 @@ func (m *Metrics) RecordConsMetrics(block *types.Block) { m.CommittedHeight.Set(float64(block.Height)) } +func (m *Metrics) MarkProposalProcessed(accepted bool) { + status := "accepted" + if !accepted { + status = "rejected" + } + m.ProposalReceiveCount.With("status", status).Add(1) +} + +func (m *Metrics) MarkVoteReceived(vt tmproto.SignedMsgType, power, totalPower int64) { + p := float64(power) / float64(totalPower) + n := strings.ToLower(strings.TrimPrefix(vt.String(), "SIGNED_MSG_TYPE_")) + m.RoundVotingPowerPercent.With("vote_type", n).Add(p) +} + func (m *Metrics) MarkRound(r int32, st time.Time) { m.Rounds.Set(float64(r)) roundTime := time.Since(st).Seconds() m.RoundDurationSeconds.Observe(roundTime) } +func (m *Metrics) MarkLateVote(vt tmproto.SignedMsgType) { + n := strings.ToLower(strings.TrimPrefix(vt.String(), "SIGNED_MSG_TYPE_")) + m.LateVotes.With("vote_type", n).Add(1) +} + func (m *Metrics) MarkStep(s cstypes.RoundStepType) { if !m.stepStart.IsZero() { stepTime := time.Since(m.stepStart).Seconds() diff --git a/consensus/state.go b/consensus/state.go index ba0e5bf5e..f2445d4a2 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1144,6 +1144,7 @@ func (cs *State) defaultDecideProposal(height int64, round int32) { } else if block == nil { panic("Method createProposalBlock should not provide a nil block without errors") } + cs.metrics.ProposalCreateCount.Add(1) blockParts, err = block.MakePartSet(types.BlockPartSizeBytes) if err != nil { cs.Logger.Error("unable to create proposal block part set", "error", err) @@ -1308,6 +1309,7 @@ func (cs *State) defaultDoPrevote(height int64, round int32) { "state machine returned an error (%v) when calling ProcessProposal", err, )) } + cs.metrics.MarkProposalProcessed(isAppValid) // Vote nil if the Application rejected the block if !isAppValid { @@ -2051,6 +2053,10 @@ func (cs *State) addVote(vote *types.Vote, peerID p2p.ID) (added bool, err error "cs_height", cs.Height, ) + if vote.Height < cs.Height || (vote.Height == cs.Height && vote.Round < cs.Round) { + cs.metrics.MarkLateVote(vote.Type) + } + // A precommit for the previous height? // These come in while we wait timeoutCommit if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { @@ -2095,6 +2101,11 @@ func (cs *State) addVote(vote *types.Vote, peerID p2p.ID) (added bool, err error // Either duplicate, or error upon cs.Votes.AddByIndex() return } + if vote.Round == cs.Round { + vals := cs.state.Validators + _, val := vals.GetByIndex(vote.ValidatorIndex) + cs.metrics.MarkVoteReceived(vote.Type, val.VotingPower, vals.TotalVotingPower()) + } if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { return added, err diff --git a/docs/tendermint-core/metrics.md b/docs/tendermint-core/metrics.md index b0f7033ac..f69313f95 100644 --- a/docs/tendermint-core/metrics.md +++ b/docs/tendermint-core/metrics.md @@ -43,6 +43,12 @@ The following metrics are available: | consensus_step_duration | Histogram | step | Histogram of durations for each step in the consensus protocol | | consensus_round_duration | Histogram | | Histogram of durations for all the rounds that have occurred since the process started | | consensus_block_gossip_parts_received | Counter | matches_current | Number of block parts received by the node | +| consensus_quorum_prevote_delay | Gauge | | Interval in seconds between the proposal timestamp and the timestamp of the earliest prevote that achieved a quorum | +| consensus_full_prevote_delay | Gauge | | Interval in seconds between the proposal timestamp and the timestamp of the latest prevote in a round where all validators voted | +| consensus_proposal_receive_count | Counter | status | Total number of proposals received by the node since process start | +| consensus_proposal_create_count | Counter | | Total number of proposals created by the node since process start | +| consensus_round_voting_power_percent | Gauge | vote_type | A value between 0 and 1.0 representing the percentage of the total voting power per vote type received within a round | +| consensus_late_votes | Counter | vote_type | Number of votes received by the node since process start that correspond to earlier heights and rounds than this node is currently in. | | p2p_peers | Gauge | | Number of peers node's connected to | | p2p_peer_receive_bytes_total | counter | peer_id, chID | number of bytes per channel received from a given peer | | p2p_peer_send_bytes_total | counter | peer_id, chID | number of bytes per channel sent to a given peer | @@ -54,6 +60,8 @@ The following metrics are available: | mempool_failed_txs | counter | | number of failed transactions | | mempool_recheck_times | counter | | number of transactions rechecked in the mempool | | state_block_processing_time | histogram | | time between BeginBlock and EndBlock in ms | +| state_consensus_param_updates | Counter | | number of consensus parameter updates returned by the application since process start | +| state_validator_set_updates | Counter | | number of validator set updates returned by the application since process start | ## Useful queries diff --git a/state/execution.go b/state/execution.go index fc34f8364..0cd53e5bb 100644 --- a/state/execution.go +++ b/state/execution.go @@ -233,6 +233,10 @@ func (blockExec *BlockExecutor) ApplyBlock( } if len(validatorUpdates) > 0 { blockExec.logger.Debug("updates to validators", "updates", types.ValidatorListString(validatorUpdates)) + blockExec.metrics.ValidatorSetUpdates.Add(1) + } + if abciResponses.EndBlock.ConsensusParamUpdates != nil { + blockExec.metrics.ConsensusParamUpdates.Add(1) } // Update the state with the block and responses. diff --git a/state/metrics.gen.go b/state/metrics.gen.go index 288f1d7cf..1ce2c4de1 100644 --- a/state/metrics.gen.go +++ b/state/metrics.gen.go @@ -22,11 +22,25 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Buckets: stdprometheus.LinearBuckets(1, 10, 10), }, labels).With(labelsAndValues...), + ConsensusParamUpdates: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "consensus_param_updates", + Help: "ConsensusParamUpdates is the total number of times the application has udated the consensus params since process start.", + }, labels).With(labelsAndValues...), + ValidatorSetUpdates: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_set_updates", + Help: "ValidatorSetUpdates is the total number of times the application has udated the validator set since process start.", + }, labels).With(labelsAndValues...), } } func NopMetrics() *Metrics { return &Metrics{ - BlockProcessingTime: discard.NewHistogram(), + BlockProcessingTime: discard.NewHistogram(), + ConsensusParamUpdates: discard.NewCounter(), + ValidatorSetUpdates: discard.NewCounter(), } } diff --git a/state/metrics.go b/state/metrics.go index 72712144f..6c238df76 100644 --- a/state/metrics.go +++ b/state/metrics.go @@ -16,4 +16,12 @@ const ( type Metrics struct { // Time between BeginBlock and EndBlock in ms. BlockProcessingTime metrics.Histogram `metrics_buckettype:"lin" metrics_bucketsizes:"1, 10, 10"` + + // ConsensusParamUpdates is the total number of times the application has + // udated the consensus params since process start. + ConsensusParamUpdates metrics.Counter + + // ValidatorSetUpdates is the total number of times the application has + // udated the validator set since process start. + ValidatorSetUpdates metrics.Counter }