mirror of
https://github.com/tendermint/tendermint.git
synced 2026-01-08 22:23:11 +00:00
consensus: additional timing metrics (#9733)
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
|
||||
- [crypto] \#9250 Update to use btcec v2 and the latest btcutil. (@wcsiu)
|
||||
- [consensus] \#9760 Save peer LastCommit correctly to achieve 50% reduction in gossiped precommits. (@williambanfield)
|
||||
- [metrics] \#9733 Add metrics for timing the consensus steps and for the progress of block gossip. (@williambanfield)
|
||||
|
||||
### BUG FIXES
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
package consensus
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/go-kit/kit/metrics/discard"
|
||||
cstypes "github.com/tendermint/tendermint/consensus/types"
|
||||
|
||||
prometheus "github.com/go-kit/kit/metrics/prometheus"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
@@ -61,6 +65,14 @@ type Metrics struct {
|
||||
// Number of blockparts transmitted by peer.
|
||||
BlockParts metrics.Counter
|
||||
|
||||
// Histogram of step duration.
|
||||
StepDuration metrics.Histogram
|
||||
stepStart time.Time
|
||||
|
||||
// Number of block parts received by the node, separated by whether the part
|
||||
// was relevant to the block the node is trying to gather or not.
|
||||
BlockGossipPartsReceived metrics.Counter
|
||||
|
||||
// QuroumPrevoteMessageDelay is the interval in seconds between the proposal
|
||||
// timestamp and the timestamp of the earliest prevote that achieved a quorum
|
||||
// during the prevote step.
|
||||
@@ -99,7 +111,6 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
|
||||
Name: "rounds",
|
||||
Help: "Number of rounds.",
|
||||
}, labels).With(labelsAndValues...),
|
||||
|
||||
Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
@@ -202,6 +213,20 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
|
||||
Name: "block_parts",
|
||||
Help: "Number of blockparts transmitted by peer.",
|
||||
}, append(labels, "peer_id")).With(labelsAndValues...),
|
||||
BlockGossipPartsReceived: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "block_gossip_parts_received",
|
||||
Help: "Number of block parts received by the node, labeled by whether the " +
|
||||
"part was relevant to the block the node was currently gathering or not.",
|
||||
}, append(labels, "matches_current")).With(labelsAndValues...),
|
||||
StepDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "step_duration_seconds",
|
||||
Help: "Time spent per step.",
|
||||
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
|
||||
}, append(labels, "step")).With(labelsAndValues...),
|
||||
QuorumPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
@@ -226,7 +251,8 @@ func NopMetrics() *Metrics {
|
||||
|
||||
ValidatorLastSignedHeight: discard.NewGauge(),
|
||||
|
||||
Rounds: discard.NewGauge(),
|
||||
Rounds: discard.NewGauge(),
|
||||
StepDuration: discard.NewHistogram(),
|
||||
|
||||
Validators: discard.NewGauge(),
|
||||
ValidatorsPower: discard.NewGauge(),
|
||||
@@ -246,7 +272,21 @@ func NopMetrics() *Metrics {
|
||||
FastSyncing: discard.NewGauge(),
|
||||
StateSyncing: discard.NewGauge(),
|
||||
BlockParts: discard.NewCounter(),
|
||||
BlockGossipPartsReceived: discard.NewCounter(),
|
||||
QuorumPrevoteMessageDelay: discard.NewGauge(),
|
||||
FullPrevoteMessageDelay: discard.NewGauge(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkRound(r int32) {
|
||||
m.Rounds.Set(float64(r))
|
||||
}
|
||||
|
||||
func (m *Metrics) MarkStep(s cstypes.RoundStepType) {
|
||||
if !m.stepStart.IsZero() {
|
||||
stepTime := time.Since(m.stepStart).Seconds()
|
||||
stepName := strings.TrimPrefix(s.String(), "RoundStep")
|
||||
m.StepDuration.With("step", stepName).Observe(stepTime)
|
||||
}
|
||||
m.stepStart = time.Now()
|
||||
}
|
||||
|
||||
@@ -520,6 +520,14 @@ func (cs *State) updateHeight(height int64) {
|
||||
}
|
||||
|
||||
func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) {
|
||||
if !cs.replayMode {
|
||||
if round != cs.Round || round == 0 && step == cstypes.RoundStepNewRound {
|
||||
cs.metrics.MarkRound(cs.Round)
|
||||
}
|
||||
if cs.Step != step {
|
||||
cs.metrics.MarkStep(cs.Step)
|
||||
}
|
||||
}
|
||||
cs.Round = round
|
||||
cs.Step = step
|
||||
}
|
||||
@@ -1019,9 +1027,6 @@ func (cs *State) enterNewRound(height int64, round int32) {
|
||||
if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil {
|
||||
cs.Logger.Error("failed publishing new round", "err", err)
|
||||
}
|
||||
|
||||
cs.metrics.Rounds.Set(float64(round))
|
||||
|
||||
// Wait for txs to be available in the mempool
|
||||
// before we enterPropose in round 0. If the last block changed the app hash,
|
||||
// we may need an empty "proof" block, and enterPropose immediately.
|
||||
@@ -1853,11 +1858,13 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add
|
||||
// Blocks might be reused, so round mismatch is OK
|
||||
if cs.Height != height {
|
||||
cs.Logger.Debug("received block part from wrong height", "height", height, "round", round)
|
||||
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// We're not expecting a block part.
|
||||
if cs.ProposalBlockParts == nil {
|
||||
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
|
||||
// NOTE: this can happen when we've gone to a higher round and
|
||||
// then receive parts from the previous round - not necessarily a bad peer.
|
||||
cs.Logger.Debug(
|
||||
@@ -1872,8 +1879,14 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add
|
||||
|
||||
added, err = cs.ProposalBlockParts.AddPart(part)
|
||||
if err != nil {
|
||||
if errors.Is(err, types.ErrPartSetInvalidProof) || errors.Is(err, types.ErrPartSetUnexpectedIndex) {
|
||||
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
|
||||
}
|
||||
return added, err
|
||||
}
|
||||
|
||||
cs.metrics.BlockGossipPartsReceived.With("matches_current", "true").Add(1)
|
||||
|
||||
if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes {
|
||||
return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)",
|
||||
cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes,
|
||||
|
||||
@@ -18,40 +18,43 @@ Listen address can be changed in the config file (see
|
||||
|
||||
The following metrics are available:
|
||||
|
||||
| **Name** | **Type** | **Tags** | **Description** |
|
||||
|--------------------------------------------|-----------|----------------|------------------------------------------------------------------------|
|
||||
| consensus\_height | Gauge | | Height of the chain |
|
||||
| consensus\_validators | Gauge | | Number of validators |
|
||||
| consensus\_validators\_power | Gauge | | Total voting power of all validators |
|
||||
| consensus\_validator\_power | Gauge | | Voting power of the node if in the validator set |
|
||||
| consensus\_validator\_last\_signed\_height | Gauge | | Last height the node signed a block, if the node is a validator |
|
||||
| consensus\_validator\_missed\_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
|
||||
| consensus\_missing\_validators | Gauge | | Number of validators who did not sign |
|
||||
| consensus\_missing\_validators\_power | Gauge | | Total voting power of the missing validators |
|
||||
| consensus\_byzantine\_validators | Gauge | | Number of validators who tried to double sign |
|
||||
| consensus\_byzantine\_validators\_power | Gauge | | Total voting power of the byzantine validators |
|
||||
| consensus\_block\_interval\_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
|
||||
| consensus\_rounds | Gauge | | Number of rounds |
|
||||
| consensus\_num\_txs | Gauge | | Number of transactions |
|
||||
| consensus\_total\_txs | Gauge | | Total number of transactions committed |
|
||||
| consensus\_block\_parts | Counter | peer\_id | Number of blockparts transmitted by peer |
|
||||
| consensus\_latest\_block\_height | Gauge | | /status sync\_info number |
|
||||
| consensus\_fast\_syncing | Gauge | | Either 0 (not fast syncing) or 1 (syncing) |
|
||||
| consensus\_state\_syncing | Gauge | | Either 0 (not state syncing) or 1 (syncing) |
|
||||
| consensus\_block\_size\_bytes | Gauge | | Block size in bytes |
|
||||
| p2p\_message\_send\_bytes\_total | Counter | message\_type | Number of bytes sent to all peers per message type |
|
||||
| p2p\_message\_receive\_bytes\_total | Counter | message\_type | Number of bytes received from all peers per message type |
|
||||
| p2p\_peers | Gauge | | Number of peers node's connected to |
|
||||
| p2p\_peer\_receive\_bytes\_total | Counter | peer\_id, chID | Number of bytes per channel received from a given peer |
|
||||
| p2p\_peer\_send\_bytes\_total | Counter | peer\_id, chID | Number of bytes per channel sent to a given peer |
|
||||
| p2p\_peer\_pending\_send\_bytes | Gauge | peer\_id | Number of pending bytes to be sent to a given peer |
|
||||
| p2p\_num\_txs | Gauge | peer\_id | Number of transactions submitted by each peer\_id |
|
||||
| p2p\_pending\_send\_bytes | Gauge | peer\_id | Amount of data pending to be sent to peer |
|
||||
| mempool\_size | Gauge | | Number of uncommitted transactions |
|
||||
| mempool\_tx\_size\_bytes | Histogram | | Transaction sizes in bytes |
|
||||
| mempool\_failed\_txs | Counter | | Number of failed transactions |
|
||||
| mempool\_recheck\_times | Counter | | Number of transactions rechecked in the mempool |
|
||||
| state\_block\_processing\_time | Histogram | | Time between BeginBlock and EndBlock in ms |
|
||||
| **Name** | **Type** | **Tags** | **Description** |
|
||||
|--------------------------------------------|-----------|------------------|------------------------------------------------------------------------|
|
||||
| consensus\_height | Gauge | | Height of the chain |
|
||||
| consensus\_validators | Gauge | | Number of validators |
|
||||
| consensus\_validators\_power | Gauge | | Total voting power of all validators |
|
||||
| consensus\_validator\_power | Gauge | | Voting power of the node if in the validator set |
|
||||
| consensus\_validator\_last\_signed\_height | Gauge | | Last height the node signed a block, if the node is a validator |
|
||||
| consensus\_validator\_missed\_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
|
||||
| consensus\_missing\_validators | Gauge | | Number of validators who did not sign |
|
||||
| consensus\_missing\_validators\_power | Gauge | | Total voting power of the missing validators |
|
||||
| consensus\_byzantine\_validators | Gauge | | Number of validators who tried to double sign |
|
||||
| consensus\_byzantine\_validators\_power | Gauge | | Total voting power of the byzantine validators |
|
||||
| consensus\_block\_interval\_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
|
||||
| consensus\_rounds | Gauge | | Number of rounds |
|
||||
| consensus\_num\_txs | Gauge | | Number of transactions |
|
||||
| consensus\_total\_txs | Gauge | | Total number of transactions committed |
|
||||
| consensus\_block\_parts | Counter | peer\_id | Number of blockparts transmitted by peer |
|
||||
| consensus\_latest\_block\_height | Gauge | | /status sync\_info number |
|
||||
| consensus\_fast\_syncing | Gauge | | Either 0 (not fast syncing) or 1 (syncing) |
|
||||
| consensus\_state\_syncing | Gauge | | Either 0 (not state syncing) or 1 (syncing) |
|
||||
| consensus\_block\_size\_bytes | Gauge | | Block size in bytes |
|
||||
| consensus\_step\_duration | Histogram | step | Histogram of durations for each step in the consensus protocol |
|
||||
| consensus\_block\_gossip\_parts\_received | Counter | matches\_current | Number of block parts received by the node |
|
||||
| p2p\_message\_send\_bytes\_total | Counter | message\_type | Number of bytes sent to all peers per message type |
|
||||
| p2p\_message\_receive\_bytes\_total | Counter | message\_type | Number of bytes received from all peers per message type |
|
||||
| p2p\_peers | Gauge | | Number of peers node's connected to |
|
||||
| p2p\_peer\_receive\_bytes\_total | Counter | peer\_id, chID | Number of bytes per channel received from a given peer |
|
||||
| p2p\_peer\_send\_bytes\_total | Counter | peer\_id, chID | Number of bytes per channel sent to a given peer |
|
||||
| p2p\_peer\_pending\_send\_bytes | Gauge | peer\_id | Number of pending bytes to be sent to a given peer |
|
||||
| p2p\_num\_txs | Gauge | peer\_id | Number of transactions submitted by each peer\_id |
|
||||
| p2p\_pending\_send\_bytes | Gauge | peer\_id | Amount of data pending to be sent to peer |
|
||||
| mempool\_size | Gauge | | Number of uncommitted transactions |
|
||||
| mempool\_tx\_size\_bytes | Histogram | | Transaction sizes in bytes |
|
||||
| mempool\_failed\_txs | Counter | | Number of failed transactions |
|
||||
| mempool\_recheck\_times | Counter | | Number of transactions rechecked in the mempool |
|
||||
| state\_block\_processing\_time | Histogram | | Time between BeginBlock and EndBlock in ms |
|
||||
|
||||
|
||||
## Useful queries
|
||||
|
||||
|
||||
Reference in New Issue
Block a user