mirror of
https://github.com/tendermint/tendermint.git
synced 2026-01-05 21:14:53 +00:00
consensus: add additional metrics for abci++ data (#8480)
This pull request adds an additional set of metrics targeted at providing more visibility into `abci++`.
The following set of metrics are added and exposed through the `metrics` endpoint:
```
tendermint_consensus_proposal_receive_count{chain_id="test-chain-IrF74Y",status="accepted"} 34
tendermint_consensus_proposal_create_count{chain_id="test-chain-IrF74Y"} 34
tendermint_consensus_vote_extension_receive_count{chain_id="test-chain-IrF74Y",status="accepted"} 34
tendermint_consensus_round_voting_power_percent{chain_id="test-chain-IrF74Y",vote_type="precommit"} 1
tendermint_consensus_round_voting_power_percent{chain_id="test-chain-IrF74Y",vote_type="prevote"} 1
tendermint_state_consensus_param_updates{chain_id="test-chain-IrF74Y"} 0
tendermint_state_validator_set_updates{chain_id="test-chain-IrF74Y"} 0
tendermint_consensus_late_votes{chain_id="test-chain-IrF74Y",vote_type="precommit"} 16
```
This pull request also updates the `metrics.md` file to include some metrics that were previously missed. My hope is to generate the `metrics.md` file with a future version of the tool being architected in #8479
This commit is contained in:
@@ -18,40 +18,53 @@ Listen address can be changed in the config file (see
|
||||
|
||||
The following metrics are available:
|
||||
|
||||
| **Name** | **Type** | **Tags** | **Description** |
|
||||
| -------------------------------------- | --------- | ------------- | ---------------------------------------------------------------------- |
|
||||
| abci_connection_method_timing | Histogram | method, type | Timings for each of the ABCI methods |
|
||||
| consensus_height | Gauge | | Height of the chain |
|
||||
| consensus_validators | Gauge | | Number of validators |
|
||||
| consensus_validators_power | Gauge | | Total voting power of all validators |
|
||||
| consensus_validator_power | Gauge | | Voting power of the node if in the validator set |
|
||||
| consensus_validator_last_signed_height | Gauge | | Last height the node signed a block, if the node is a validator |
|
||||
| consensus_validator_missed_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
|
||||
| consensus_missing_validators | Gauge | | Number of validators who did not sign |
|
||||
| consensus_missing_validators_power | Gauge | | Total voting power of the missing validators |
|
||||
| consensus_byzantine_validators | Gauge | | Number of validators who tried to double sign |
|
||||
| consensus_byzantine_validators_power | Gauge | | Total voting power of the byzantine validators |
|
||||
| consensus_block_interval_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
|
||||
| consensus_rounds | Gauge | | Number of rounds |
|
||||
| consensus_num_txs | Gauge | | Number of transactions |
|
||||
| consensus_total_txs | Gauge | | Total number of transactions committed |
|
||||
| consensus_block_parts | counter | peer_id | number of blockparts transmitted by peer |
|
||||
| consensus_latest_block_height | gauge | | /status sync_info number |
|
||||
| consensus_fast_syncing | gauge | | either 0 (not fast syncing) or 1 (syncing) |
|
||||
| consensus_state_syncing | gauge | | either 0 (not state syncing) or 1 (syncing) |
|
||||
| consensus_block_size_bytes | Gauge | | Block size in bytes |
|
||||
| evidence_pool_num_evidence | Gauge | | Number of evidence in the evidence pool
|
||||
| p2p_peers | Gauge | | Number of peers node's connected to |
|
||||
| p2p_peer_receive_bytes_total | counter | peer_id, chID | number of bytes per channel received from a given peer |
|
||||
| p2p_peer_send_bytes_total | counter | peer_id, chID | number of bytes per channel sent to a given peer |
|
||||
| p2p_peer_pending_send_bytes | gauge | peer_id | number of pending bytes to be sent to a given peer |
|
||||
| p2p_num_txs | gauge | peer_id | number of transactions submitted by each peer_id |
|
||||
| p2p_pending_send_bytes | gauge | peer_id | amount of data pending to be sent to peer |
|
||||
| mempool_size | Gauge | | Number of uncommitted transactions |
|
||||
| mempool_tx_size_bytes | histogram | | transaction sizes in bytes |
|
||||
| mempool_failed_txs | counter | | number of failed transactions |
|
||||
| mempool_recheck_times | counter | | number of transactions rechecked in the mempool |
|
||||
| state_block_processing_time | histogram | | time between BeginBlock and EndBlock in ms |
|
||||
| **Name** | **Type** | **Tags** | **Description** |
|
||||
|-----------------------------------------|-----------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| abci_connection_method_timing | Histogram | method, type | Timings for each of the ABCI methods |
|
||||
| consensus_height | Gauge | | Height of the chain |
|
||||
| consensus_validators | Gauge | | Number of validators |
|
||||
| consensus_validators_power | Gauge | | Total voting power of all validators |
|
||||
| consensus_validator_power | Gauge | | Voting power of the node if in the validator set |
|
||||
| consensus_validator_last_signed_height | Gauge | | Last height the node signed a block, if the node is a validator |
|
||||
| consensus_validator_missed_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
|
||||
| consensus_missing_validators | Gauge | | Number of validators who did not sign |
|
||||
| consensus_missing_validators_power | Gauge | | Total voting power of the missing validators |
|
||||
| consensus_byzantine_validators | Gauge | | Number of validators who tried to double sign |
|
||||
| consensus_byzantine_validators_power | Gauge | | Total voting power of the byzantine validators |
|
||||
| consensus_block_interval_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
|
||||
| consensus_rounds | Gauge | | Number of rounds |
|
||||
| consensus_num_txs | Gauge | | Number of transactions |
|
||||
| consensus_total_txs | Gauge | | Total number of transactions committed |
|
||||
| consensus_block_parts | Counter | peer_id | number of blockparts transmitted by peer |
|
||||
| consensus_latest_block_height | gauge | | /status sync_info number |
|
||||
| consensus_fast_syncing | gauge | | either 0 (not fast syncing) or 1 (syncing) |
|
||||
| consensus_state_syncing | gauge | | either 0 (not state syncing) or 1 (syncing) |
|
||||
| consensus_block_size_bytes | Gauge | | Block size in bytes |
|
||||
| consensus_step_duration | Histogram | step | Histogram of durations for each step in the consensus protocol |
|
||||
| consensus_block_gossip_receive_latency | Histogram | | Histogram of time taken to receive a block in seconds, measure between when a new block is first discovered to when the block is completed |
|
||||
| consensus_block_gossip_parts_received | Counter | matches_current | Number of block parts received by the node |
|
||||
| consensus_quorum_prevote_delay | Gauge | | Interval in seconds between the proposal timestamp and the timestamp of the earliest prevote that achieved a quorum |
|
||||
| consensus_full_prevote_delay | Gauge | | Interval in seconds between the proposal timestamp and the timestamp of the latest prevote in a round where all validators voted |
|
||||
| consensus_proposal_timestamp_difference | Histogram | | Difference between the timestamp in the proposal message and the local time of the validator at the time it received the message |
|
||||
| consensus_vote_extension_receive_count | Counter | status | Number of vote extensions received |
|
||||
| consensus_proposal_receive_count | Counter | status | Total number of proposals received by the node since process start |
|
||||
| consensus_proposal_create_count | Counter | | Total number of proposals created by the node since process start |
|
||||
| consensus_round_voting_power_percent | Gauge | vote_type | A value between 0 and 1.0 representing the percentage of the total voting power per vote type received within a round |
|
||||
| consensus_late_votes | Counter | vote_type | Number of votes received by the node since process start that correspond to earlier heights and rounds than this node is currently in. |
|
||||
| evidence_pool_num_evidence | Gauge | | Number of evidence in the evidence pool |
|
||||
| p2p_peers | Gauge | | Number of peers node's connected to |
|
||||
| p2p_peer_receive_bytes_total | Counter | peer_id, chID | number of bytes per channel received from a given peer |
|
||||
| p2p_peer_send_bytes_total | Counter | peer_id, chID | number of bytes per channel sent to a given peer |
|
||||
| p2p_peer_pending_send_bytes | Gauge | peer_id | number of pending bytes to be sent to a given peer |
|
||||
| p2p_num_txs | Gauge | peer_id | number of transactions submitted by each peer_id |
|
||||
| p2p_pending_send_bytes | Gauge | peer_id | amount of data pending to be sent to peer |
|
||||
| mempool_size | Gauge | | Number of uncommitted transactions |
|
||||
| mempool_tx_size_bytes | Histogram | | transaction sizes in bytes |
|
||||
| mempool_failed_txs | Counter | | number of failed transactions |
|
||||
| mempool_recheck_times | Counter | | number of transactions rechecked in the mempool |
|
||||
| state_block_processing_time | Histogram | | time between BeginBlock and EndBlock in ms |
|
||||
| state_consensus_param_updates | Counter | | number of consensus parameter updates returned by the application since process start |
|
||||
| state_validator_set_updates | Counter | | number of validator set updates returned by the application since process start |
|
||||
|
||||
## Useful queries
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/go-kit/kit/metrics/discard"
|
||||
|
||||
cstypes "github.com/tendermint/tendermint/internal/consensus/types"
|
||||
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
|
||||
prometheus "github.com/go-kit/kit/metrics/prometheus"
|
||||
@@ -103,6 +104,33 @@ type Metrics struct {
|
||||
// the proposal message and the local time of the validator at the time
|
||||
// that the validator received the message.
|
||||
ProposalTimestampDifference metrics.Histogram
|
||||
|
||||
// VoteExtensionReceiveCount is the number of vote extensions received by this
|
||||
// node. The metric is annotated by the status of the vote extension from the
|
||||
// application, either 'accepted' or 'rejected'.
|
||||
VoteExtensionReceiveCount metrics.Counter
|
||||
|
||||
// ProposalReceiveCount is the total number of proposals received by this node
|
||||
// since process start.
|
||||
// The metric is annotated by the status of the proposal from the application,
|
||||
// either 'accepted' or 'rejected'.
|
||||
ProposalReceiveCount metrics.Counter
|
||||
|
||||
// ProposalCreationCount is the total number of proposals created by this node
|
||||
// since process start.
|
||||
// The metric is annotated by the status of the proposal from the application,
|
||||
// either 'accepted' or 'rejected'.
|
||||
ProposalCreateCount metrics.Counter
|
||||
|
||||
// RoundVotingPowerPercent is the percentage of the total voting power received
|
||||
// with a round. The value begins at 0 for each round and approaches 1.0 as
|
||||
// additional voting power is observed. The metric is labeled by vote type.
|
||||
RoundVotingPowerPercent metrics.Gauge
|
||||
|
||||
// LateVotes stores the number of votes that were received by this node that
|
||||
// correspond to earlier heights and rounds than this node is currently
|
||||
// in.
|
||||
LateVotes metrics.Counter
|
||||
}
|
||||
|
||||
// PrometheusMetrics returns Metrics build using Prometheus client library.
|
||||
@@ -280,6 +308,43 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
|
||||
"Only calculated when a new block is proposed.",
|
||||
Buckets: []float64{-10, -.5, -.025, 0, .1, .5, 1, 1.5, 2, 10},
|
||||
}, append(labels, "is_timely")).With(labelsAndValues...),
|
||||
VoteExtensionReceiveCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "vote_extension_receive_count",
|
||||
Help: "Number of vote extensions received by the node since process start, labeled by " +
|
||||
"the application's response to VerifyVoteExtension, either accept or reject.",
|
||||
}, append(labels, "status")).With(labelsAndValues...),
|
||||
|
||||
ProposalReceiveCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "proposal_receive_count",
|
||||
Help: "Number of vote proposals received by the node since process start, labeled by " +
|
||||
"the application's response to ProcessProposal, either accept or reject.",
|
||||
}, append(labels, "status")).With(labelsAndValues...),
|
||||
|
||||
ProposalCreateCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "proposal_create_count",
|
||||
Help: "Number of proposals created by the node since process start.",
|
||||
}, labels).With(labelsAndValues...),
|
||||
|
||||
RoundVotingPowerPercent: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "round_voting_power_percent",
|
||||
Help: "Percentage of the total voting power received with a round. " +
|
||||
"The value begins at 0 for each round and approaches 1.0 as additional " +
|
||||
"voting power is observed.",
|
||||
}, append(labels, "vote_type")).With(labelsAndValues...),
|
||||
LateVotes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "late_votes",
|
||||
Help: "Number of votes received by the node since process start that correspond to earlier heights and rounds than this node is currently in.",
|
||||
}, append(labels, "vote_type")).With(labelsAndValues...),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -317,6 +382,11 @@ func NopMetrics() *Metrics {
|
||||
QuorumPrevoteDelay: discard.NewGauge(),
|
||||
FullPrevoteDelay: discard.NewGauge(),
|
||||
ProposalTimestampDifference: discard.NewHistogram(),
|
||||
VoteExtensionReceiveCount: discard.NewCounter(),
|
||||
ProposalReceiveCount: discard.NewCounter(),
|
||||
ProposalCreateCount: discard.NewCounter(),
|
||||
RoundVotingPowerPercent: discard.NewGauge(),
|
||||
LateVotes: discard.NewCounter(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -336,10 +406,45 @@ func (m *Metrics) MarkBlockGossipComplete() {
|
||||
m.BlockGossipReceiveLatency.Observe(time.Since(m.blockGossipStart).Seconds())
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkProposalProcessed(accepted bool) {
|
||||
status := "accepted"
|
||||
if !accepted {
|
||||
status = "rejected"
|
||||
}
|
||||
m.ProposalReceiveCount.With("status", status).Add(1)
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkVoteExtensionReceived(accepted bool) {
|
||||
status := "accepted"
|
||||
if !accepted {
|
||||
status = "rejected"
|
||||
}
|
||||
m.VoteExtensionReceiveCount.With("status", status).Add(1)
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkVoteReceived(vt tmproto.SignedMsgType, power, totalPower int64) {
|
||||
p := float64(power) / float64(totalPower)
|
||||
n := strings.ToLower(strings.TrimPrefix(vt.String(), "SIGNED_MSG_TYPE_"))
|
||||
m.RoundVotingPowerPercent.With("vote_type", n).Add(p)
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkRound(r int32, st time.Time) {
|
||||
m.Rounds.Set(float64(r))
|
||||
roundTime := time.Since(st).Seconds()
|
||||
m.RoundDuration.Observe(roundTime)
|
||||
|
||||
pvt := tmproto.PrevoteType
|
||||
pvn := strings.ToLower(strings.TrimPrefix(pvt.String(), "SIGNED_MSG_TYPE_"))
|
||||
m.RoundVotingPowerPercent.With("vote_type", pvn).Set(0)
|
||||
|
||||
pct := tmproto.PrecommitType
|
||||
pcn := strings.ToLower(strings.TrimPrefix(pct.String(), "SIGNED_MSG_TYPE_"))
|
||||
m.RoundVotingPowerPercent.With("vote_type", pcn).Set(0)
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkLateVote(vt tmproto.SignedMsgType) {
|
||||
n := strings.ToLower(strings.TrimPrefix(vt.String(), "SIGNED_MSG_TYPE_"))
|
||||
m.LateVotes.With("vote_type", n).Add(1)
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkStep(s cstypes.RoundStepType) {
|
||||
|
||||
@@ -1334,6 +1334,7 @@ func (cs *State) defaultDecideProposal(ctx context.Context, height int64, round
|
||||
} else if block == nil {
|
||||
return
|
||||
}
|
||||
cs.metrics.ProposalCreateCount.Add(1)
|
||||
blockParts, err = block.MakePartSet(types.BlockPartSizeBytes)
|
||||
if err != nil {
|
||||
cs.logger.Error("unable to create proposal block part set", "error", err)
|
||||
@@ -1531,6 +1532,7 @@ func (cs *State) defaultDoPrevote(ctx context.Context, height int64, round int32
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("ProcessProposal: %v", err))
|
||||
}
|
||||
cs.metrics.MarkProposalProcessed(isAppValid)
|
||||
|
||||
// Vote nil if the Application rejected the block
|
||||
if !isAppValid {
|
||||
@@ -2297,6 +2299,10 @@ func (cs *State) addVote(
|
||||
"cs_height", cs.Height,
|
||||
)
|
||||
|
||||
if vote.Height < cs.Height || (vote.Height == cs.Height && vote.Round < cs.Round) {
|
||||
cs.metrics.MarkLateVote(vote.Type)
|
||||
}
|
||||
|
||||
// A precommit for the previous height?
|
||||
// These come in while we wait timeoutCommit
|
||||
if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType {
|
||||
@@ -2337,7 +2343,9 @@ func (cs *State) addVote(
|
||||
|
||||
// Verify VoteExtension if precommit
|
||||
if vote.Type == tmproto.PrecommitType {
|
||||
if err = cs.blockExec.VerifyVoteExtension(ctx, vote); err != nil {
|
||||
err := cs.blockExec.VerifyVoteExtension(ctx, vote)
|
||||
cs.metrics.MarkVoteExtensionReceived(err == nil)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
@@ -2348,6 +2356,11 @@ func (cs *State) addVote(
|
||||
// Either duplicate, or error upon cs.Votes.AddByIndex()
|
||||
return
|
||||
}
|
||||
if vote.Round == cs.Round {
|
||||
vals := cs.state.Validators
|
||||
_, val := vals.GetByIndex(vote.ValidatorIndex)
|
||||
cs.metrics.MarkVoteReceived(vote.Type, val.VotingPower, vals.TotalVotingPower())
|
||||
}
|
||||
|
||||
if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil {
|
||||
return added, err
|
||||
|
||||
@@ -247,6 +247,10 @@ func (blockExec *BlockExecutor) ApplyBlock(
|
||||
}
|
||||
if len(validatorUpdates) > 0 {
|
||||
blockExec.logger.Debug("updates to validators", "updates", types.ValidatorListString(validatorUpdates))
|
||||
blockExec.metrics.ValidatorSetUpdates.Add(1)
|
||||
}
|
||||
if finalizeBlockResponse.ConsensusParamUpdates != nil {
|
||||
blockExec.metrics.ConsensusParamUpdates.Add(1)
|
||||
}
|
||||
|
||||
// Update the state with the block and responses.
|
||||
|
||||
@@ -17,6 +17,14 @@ const (
|
||||
type Metrics struct {
|
||||
// Time between BeginBlock and EndBlock.
|
||||
BlockProcessingTime metrics.Histogram
|
||||
|
||||
// ConsensusParamUpdates is the total number of times the application has
|
||||
// udated the consensus params since process start.
|
||||
ConsensusParamUpdates metrics.Counter
|
||||
|
||||
// ValidatorSetUpdates is the total number of times the application has
|
||||
// udated the validator set since process start.
|
||||
ValidatorSetUpdates metrics.Counter
|
||||
}
|
||||
|
||||
// PrometheusMetrics returns Metrics build using Prometheus client library.
|
||||
@@ -35,12 +43,29 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
|
||||
Help: "Time between BeginBlock and EndBlock in ms.",
|
||||
Buckets: stdprometheus.LinearBuckets(1, 10, 10),
|
||||
}, labels).With(labelsAndValues...),
|
||||
ConsensusParamUpdates: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "consensus_param_updates",
|
||||
Help: "The total number of times the application as updated the consensus " +
|
||||
"parameters since process start.",
|
||||
}, labels).With(labelsAndValues...),
|
||||
|
||||
ValidatorSetUpdates: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "validator_set_updates",
|
||||
Help: "The total number of times the application as updated the validator " +
|
||||
"set since process start.",
|
||||
}, labels).With(labelsAndValues...),
|
||||
}
|
||||
}
|
||||
|
||||
// NopMetrics returns no-op Metrics.
|
||||
func NopMetrics() *Metrics {
|
||||
return &Metrics{
|
||||
BlockProcessingTime: discard.NewHistogram(),
|
||||
BlockProcessingTime: discard.NewHistogram(),
|
||||
ConsensusParamUpdates: discard.NewCounter(),
|
||||
ValidatorSetUpdates: discard.NewCounter(),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user