mirror of
https://github.com/tendermint/tendermint.git
synced 2026-02-05 11:31:16 +00:00
This pull request adds an additional set of metrics targeted at providing more visibility into `abci++`.
The following set of metrics are added and exposed through the `metrics` endpoint:
```
tendermint_consensus_proposal_receive_count{chain_id="test-chain-IrF74Y",status="accepted"} 34
tendermint_consensus_proposal_create_count{chain_id="test-chain-IrF74Y"} 34
tendermint_consensus_vote_extension_receive_count{chain_id="test-chain-IrF74Y",status="accepted"} 34
tendermint_consensus_round_voting_power_percent{chain_id="test-chain-IrF74Y",vote_type="precommit"} 1
tendermint_consensus_round_voting_power_percent{chain_id="test-chain-IrF74Y",vote_type="prevote"} 1
tendermint_state_consensus_param_updates{chain_id="test-chain-IrF74Y"} 0
tendermint_state_validator_set_updates{chain_id="test-chain-IrF74Y"} 0
tendermint_consensus_late_votes{chain_id="test-chain-IrF74Y",vote_type="precommit"} 16
```
This pull request also updates the `metrics.md` file to include some metrics that were previously missed. My hope is to generate the `metrics.md` file with a future version of the tool being architected in #8479
458 lines
18 KiB
Go
458 lines
18 KiB
Go
package consensus
|
|
|
|
import (
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/go-kit/kit/metrics"
|
|
"github.com/go-kit/kit/metrics/discard"
|
|
|
|
cstypes "github.com/tendermint/tendermint/internal/consensus/types"
|
|
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
|
|
"github.com/tendermint/tendermint/types"
|
|
|
|
prometheus "github.com/go-kit/kit/metrics/prometheus"
|
|
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
const (
|
|
// MetricsSubsystem is a subsystem shared by all metrics exposed by this
|
|
// package.
|
|
MetricsSubsystem = "consensus"
|
|
)
|
|
|
|
// Metrics contains metrics exposed by this package.
|
|
type Metrics struct {
|
|
// Height of the chain.
|
|
Height metrics.Gauge
|
|
|
|
// ValidatorLastSignedHeight of a validator.
|
|
ValidatorLastSignedHeight metrics.Gauge
|
|
|
|
// Number of rounds.
|
|
Rounds metrics.Gauge
|
|
|
|
// Histogram of round duration.
|
|
RoundDuration metrics.Histogram
|
|
|
|
// Number of validators.
|
|
Validators metrics.Gauge
|
|
// Total power of all validators.
|
|
ValidatorsPower metrics.Gauge
|
|
// Power of a validator.
|
|
ValidatorPower metrics.Gauge
|
|
// Amount of blocks missed by a validator.
|
|
ValidatorMissedBlocks metrics.Gauge
|
|
// Number of validators who did not sign.
|
|
MissingValidators metrics.Gauge
|
|
// Total power of the missing validators.
|
|
MissingValidatorsPower metrics.Gauge
|
|
// Number of validators who tried to double sign.
|
|
ByzantineValidators metrics.Gauge
|
|
// Total power of the byzantine validators.
|
|
ByzantineValidatorsPower metrics.Gauge
|
|
|
|
// Time between this and the last block.
|
|
BlockIntervalSeconds metrics.Histogram
|
|
|
|
// Number of transactions.
|
|
NumTxs metrics.Gauge
|
|
// Size of the block.
|
|
BlockSizeBytes metrics.Histogram
|
|
// Total number of transactions.
|
|
TotalTxs metrics.Gauge
|
|
// The latest block height.
|
|
CommittedHeight metrics.Gauge
|
|
// Whether or not a node is block syncing. 1 if yes, 0 if no.
|
|
BlockSyncing metrics.Gauge
|
|
// Whether or not a node is state syncing. 1 if yes, 0 if no.
|
|
StateSyncing metrics.Gauge
|
|
|
|
// Number of blockparts transmitted by peer.
|
|
BlockParts metrics.Counter
|
|
|
|
// Histogram of step duration.
|
|
StepDuration metrics.Histogram
|
|
stepStart time.Time
|
|
|
|
// Histogram of time taken to receive a block in seconds, measured between when a new block is first
|
|
// discovered to when the block is completed.
|
|
BlockGossipReceiveLatency metrics.Histogram
|
|
blockGossipStart time.Time
|
|
|
|
// Number of block parts received by the node, separated by whether the part
|
|
// was relevant to the block the node is trying to gather or not.
|
|
BlockGossipPartsReceived metrics.Counter
|
|
|
|
// QuroumPrevoteMessageDelay is the interval in seconds between the proposal
|
|
// timestamp and the timestamp of the earliest prevote that achieved a quorum
|
|
// during the prevote step.
|
|
//
|
|
// To compute it, sum the voting power over each prevote received, in increasing
|
|
// order of timestamp. The timestamp of the first prevote to increase the sum to
|
|
// be above 2/3 of the total voting power of the network defines the endpoint
|
|
// the endpoint of the interval. Subtract the proposal timestamp from this endpoint
|
|
// to obtain the quorum delay.
|
|
QuorumPrevoteDelay metrics.Gauge
|
|
|
|
// FullPrevoteDelay is the interval in seconds between the proposal
|
|
// timestamp and the timestamp of the latest prevote in a round where 100%
|
|
// of the voting power on the network issued prevotes.
|
|
FullPrevoteDelay metrics.Gauge
|
|
|
|
// ProposalTimestampDifference is the difference between the timestamp in
|
|
// the proposal message and the local time of the validator at the time
|
|
// that the validator received the message.
|
|
ProposalTimestampDifference metrics.Histogram
|
|
|
|
// VoteExtensionReceiveCount is the number of vote extensions received by this
|
|
// node. The metric is annotated by the status of the vote extension from the
|
|
// application, either 'accepted' or 'rejected'.
|
|
VoteExtensionReceiveCount metrics.Counter
|
|
|
|
// ProposalReceiveCount is the total number of proposals received by this node
|
|
// since process start.
|
|
// The metric is annotated by the status of the proposal from the application,
|
|
// either 'accepted' or 'rejected'.
|
|
ProposalReceiveCount metrics.Counter
|
|
|
|
// ProposalCreationCount is the total number of proposals created by this node
|
|
// since process start.
|
|
// The metric is annotated by the status of the proposal from the application,
|
|
// either 'accepted' or 'rejected'.
|
|
ProposalCreateCount metrics.Counter
|
|
|
|
// RoundVotingPowerPercent is the percentage of the total voting power received
|
|
// with a round. The value begins at 0 for each round and approaches 1.0 as
|
|
// additional voting power is observed. The metric is labeled by vote type.
|
|
RoundVotingPowerPercent metrics.Gauge
|
|
|
|
// LateVotes stores the number of votes that were received by this node that
|
|
// correspond to earlier heights and rounds than this node is currently
|
|
// in.
|
|
LateVotes metrics.Counter
|
|
}
|
|
|
|
// PrometheusMetrics returns Metrics build using Prometheus client library.
|
|
// Optionally, labels can be provided along with their values ("foo",
|
|
// "fooValue").
|
|
func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
|
|
labels := []string{}
|
|
for i := 0; i < len(labelsAndValues); i += 2 {
|
|
labels = append(labels, labelsAndValues[i])
|
|
}
|
|
return &Metrics{
|
|
Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "height",
|
|
Help: "Height of the chain.",
|
|
}, labels).With(labelsAndValues...),
|
|
Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "rounds",
|
|
Help: "Number of rounds.",
|
|
}, labels).With(labelsAndValues...),
|
|
RoundDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "round_duration",
|
|
Help: "Time spent in a round.",
|
|
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
|
|
}, labels).With(labelsAndValues...),
|
|
Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "validators",
|
|
Help: "Number of validators.",
|
|
}, labels).With(labelsAndValues...),
|
|
ValidatorLastSignedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "validator_last_signed_height",
|
|
Help: "Last signed height for a validator",
|
|
}, append(labels, "validator_address")).With(labelsAndValues...),
|
|
ValidatorMissedBlocks: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "validator_missed_blocks",
|
|
Help: "Total missed blocks for a validator",
|
|
}, append(labels, "validator_address")).With(labelsAndValues...),
|
|
ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "validators_power",
|
|
Help: "Total power of all validators.",
|
|
}, labels).With(labelsAndValues...),
|
|
ValidatorPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "validator_power",
|
|
Help: "Power of a validator",
|
|
}, append(labels, "validator_address")).With(labelsAndValues...),
|
|
MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "missing_validators",
|
|
Help: "Number of validators who did not sign.",
|
|
}, labels).With(labelsAndValues...),
|
|
MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "missing_validators_power",
|
|
Help: "Total power of the missing validators.",
|
|
}, labels).With(labelsAndValues...),
|
|
ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "byzantine_validators",
|
|
Help: "Number of validators who tried to double sign.",
|
|
}, labels).With(labelsAndValues...),
|
|
ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "byzantine_validators_power",
|
|
Help: "Total power of the byzantine validators.",
|
|
}, labels).With(labelsAndValues...),
|
|
BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "block_interval_seconds",
|
|
Help: "Time between this and the last block.",
|
|
}, labels).With(labelsAndValues...),
|
|
NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "num_txs",
|
|
Help: "Number of transactions.",
|
|
}, labels).With(labelsAndValues...),
|
|
BlockSizeBytes: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "block_size_bytes",
|
|
Help: "Size of the block.",
|
|
}, labels).With(labelsAndValues...),
|
|
TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "total_txs",
|
|
Help: "Total number of transactions.",
|
|
}, labels).With(labelsAndValues...),
|
|
CommittedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "latest_block_height",
|
|
Help: "The latest block height.",
|
|
}, labels).With(labelsAndValues...),
|
|
BlockSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "block_syncing",
|
|
Help: "Whether or not a node is block syncing. 1 if yes, 0 if no.",
|
|
}, labels).With(labelsAndValues...),
|
|
StateSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "state_syncing",
|
|
Help: "Whether or not a node is state syncing. 1 if yes, 0 if no.",
|
|
}, labels).With(labelsAndValues...),
|
|
BlockParts: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "block_parts",
|
|
Help: "Number of blockparts transmitted by peer.",
|
|
}, append(labels, "peer_id")).With(labelsAndValues...),
|
|
BlockGossipReceiveLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "block_gossip_receive_latency",
|
|
Help: "Difference in seconds between when the validator learns of a new block" +
|
|
"and when the validator receives the last piece of the block.",
|
|
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
|
|
}, labels).With(labelsAndValues...),
|
|
BlockGossipPartsReceived: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "block_gossip_parts_received",
|
|
Help: "Number of block parts received by the node, labeled by whether the " +
|
|
"part was relevant to the block the node was currently gathering or not.",
|
|
}, append(labels, "matches_current")).With(labelsAndValues...),
|
|
StepDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "step_duration",
|
|
Help: "Time spent per step.",
|
|
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
|
|
}, append(labels, "step")).With(labelsAndValues...),
|
|
QuorumPrevoteDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "quorum_prevote_delay",
|
|
Help: "Difference in seconds between the proposal timestamp and the timestamp " +
|
|
"of the latest prevote that achieved a quorum in the prevote step.",
|
|
}, append(labels, "proposer_address")).With(labelsAndValues...),
|
|
FullPrevoteDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "full_prevote_delay",
|
|
Help: "Difference in seconds between the proposal timestamp and the timestamp " +
|
|
"of the latest prevote that achieved 100% of the voting power in the prevote step.",
|
|
}, append(labels, "proposer_address")).With(labelsAndValues...),
|
|
ProposalTimestampDifference: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "proposal_timestamp_difference",
|
|
Help: "Difference in seconds between the timestamp in the proposal " +
|
|
"message and the local time when the message was received. " +
|
|
"Only calculated when a new block is proposed.",
|
|
Buckets: []float64{-10, -.5, -.025, 0, .1, .5, 1, 1.5, 2, 10},
|
|
}, append(labels, "is_timely")).With(labelsAndValues...),
|
|
VoteExtensionReceiveCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "vote_extension_receive_count",
|
|
Help: "Number of vote extensions received by the node since process start, labeled by " +
|
|
"the application's response to VerifyVoteExtension, either accept or reject.",
|
|
}, append(labels, "status")).With(labelsAndValues...),
|
|
|
|
ProposalReceiveCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "proposal_receive_count",
|
|
Help: "Number of vote proposals received by the node since process start, labeled by " +
|
|
"the application's response to ProcessProposal, either accept or reject.",
|
|
}, append(labels, "status")).With(labelsAndValues...),
|
|
|
|
ProposalCreateCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "proposal_create_count",
|
|
Help: "Number of proposals created by the node since process start.",
|
|
}, labels).With(labelsAndValues...),
|
|
|
|
RoundVotingPowerPercent: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "round_voting_power_percent",
|
|
Help: "Percentage of the total voting power received with a round. " +
|
|
"The value begins at 0 for each round and approaches 1.0 as additional " +
|
|
"voting power is observed.",
|
|
}, append(labels, "vote_type")).With(labelsAndValues...),
|
|
LateVotes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "late_votes",
|
|
Help: "Number of votes received by the node since process start that correspond to earlier heights and rounds than this node is currently in.",
|
|
}, append(labels, "vote_type")).With(labelsAndValues...),
|
|
}
|
|
}
|
|
|
|
// NopMetrics returns no-op Metrics.
|
|
func NopMetrics() *Metrics {
|
|
return &Metrics{
|
|
Height: discard.NewGauge(),
|
|
|
|
ValidatorLastSignedHeight: discard.NewGauge(),
|
|
|
|
Rounds: discard.NewGauge(),
|
|
RoundDuration: discard.NewHistogram(),
|
|
StepDuration: discard.NewHistogram(),
|
|
|
|
Validators: discard.NewGauge(),
|
|
ValidatorsPower: discard.NewGauge(),
|
|
ValidatorPower: discard.NewGauge(),
|
|
ValidatorMissedBlocks: discard.NewGauge(),
|
|
MissingValidators: discard.NewGauge(),
|
|
MissingValidatorsPower: discard.NewGauge(),
|
|
ByzantineValidators: discard.NewGauge(),
|
|
ByzantineValidatorsPower: discard.NewGauge(),
|
|
|
|
BlockIntervalSeconds: discard.NewHistogram(),
|
|
|
|
NumTxs: discard.NewGauge(),
|
|
BlockSizeBytes: discard.NewHistogram(),
|
|
TotalTxs: discard.NewGauge(),
|
|
CommittedHeight: discard.NewGauge(),
|
|
BlockSyncing: discard.NewGauge(),
|
|
StateSyncing: discard.NewGauge(),
|
|
BlockParts: discard.NewCounter(),
|
|
BlockGossipReceiveLatency: discard.NewHistogram(),
|
|
BlockGossipPartsReceived: discard.NewCounter(),
|
|
QuorumPrevoteDelay: discard.NewGauge(),
|
|
FullPrevoteDelay: discard.NewGauge(),
|
|
ProposalTimestampDifference: discard.NewHistogram(),
|
|
VoteExtensionReceiveCount: discard.NewCounter(),
|
|
ProposalReceiveCount: discard.NewCounter(),
|
|
ProposalCreateCount: discard.NewCounter(),
|
|
RoundVotingPowerPercent: discard.NewGauge(),
|
|
LateVotes: discard.NewCounter(),
|
|
}
|
|
}
|
|
|
|
// RecordConsMetrics uses for recording the block related metrics during fast-sync.
|
|
func (m *Metrics) RecordConsMetrics(block *types.Block) {
|
|
m.NumTxs.Set(float64(len(block.Data.Txs)))
|
|
m.TotalTxs.Add(float64(len(block.Data.Txs)))
|
|
m.BlockSizeBytes.Observe(float64(block.Size()))
|
|
m.CommittedHeight.Set(float64(block.Height))
|
|
}
|
|
|
|
func (m *Metrics) MarkBlockGossipStarted() {
|
|
m.blockGossipStart = time.Now()
|
|
}
|
|
|
|
func (m *Metrics) MarkBlockGossipComplete() {
|
|
m.BlockGossipReceiveLatency.Observe(time.Since(m.blockGossipStart).Seconds())
|
|
}
|
|
|
|
func (m *Metrics) MarkProposalProcessed(accepted bool) {
|
|
status := "accepted"
|
|
if !accepted {
|
|
status = "rejected"
|
|
}
|
|
m.ProposalReceiveCount.With("status", status).Add(1)
|
|
}
|
|
|
|
func (m *Metrics) MarkVoteExtensionReceived(accepted bool) {
|
|
status := "accepted"
|
|
if !accepted {
|
|
status = "rejected"
|
|
}
|
|
m.VoteExtensionReceiveCount.With("status", status).Add(1)
|
|
}
|
|
|
|
func (m *Metrics) MarkVoteReceived(vt tmproto.SignedMsgType, power, totalPower int64) {
|
|
p := float64(power) / float64(totalPower)
|
|
n := strings.ToLower(strings.TrimPrefix(vt.String(), "SIGNED_MSG_TYPE_"))
|
|
m.RoundVotingPowerPercent.With("vote_type", n).Add(p)
|
|
}
|
|
|
|
func (m *Metrics) MarkRound(r int32, st time.Time) {
|
|
m.Rounds.Set(float64(r))
|
|
roundTime := time.Since(st).Seconds()
|
|
m.RoundDuration.Observe(roundTime)
|
|
|
|
pvt := tmproto.PrevoteType
|
|
pvn := strings.ToLower(strings.TrimPrefix(pvt.String(), "SIGNED_MSG_TYPE_"))
|
|
m.RoundVotingPowerPercent.With("vote_type", pvn).Set(0)
|
|
|
|
pct := tmproto.PrecommitType
|
|
pcn := strings.ToLower(strings.TrimPrefix(pct.String(), "SIGNED_MSG_TYPE_"))
|
|
m.RoundVotingPowerPercent.With("vote_type", pcn).Set(0)
|
|
}
|
|
|
|
func (m *Metrics) MarkLateVote(vt tmproto.SignedMsgType) {
|
|
n := strings.ToLower(strings.TrimPrefix(vt.String(), "SIGNED_MSG_TYPE_"))
|
|
m.LateVotes.With("vote_type", n).Add(1)
|
|
}
|
|
|
|
func (m *Metrics) MarkStep(s cstypes.RoundStepType) {
|
|
if !m.stepStart.IsZero() {
|
|
stepTime := time.Since(m.stepStart).Seconds()
|
|
stepName := strings.TrimPrefix(s.String(), "RoundStep")
|
|
m.StepDuration.With("step", stepName).Observe(stepTime)
|
|
}
|
|
m.stepStart = time.Now()
|
|
}
|