Compare commits

...

24 Commits

Author SHA1 Message Date
William Banfield
6c8431b3e2 save last precommits 2022-11-23 18:38:55 -05:00
William Banfield
840faa318a fix label 2022-11-23 18:07:52 -05:00
William Banfield
3d618d6a4e store last commit size metric 2022-11-23 17:41:17 -05:00
William Banfield
7bf12a4729 comment out new height last commit send again 2022-11-23 14:47:47 -05:00
William Banfield
925876e200 check that the peer is in the same height as us 2022-11-23 14:42:38 -05:00
William Banfield
005db226ce comment out new height last commit send 2022-11-23 14:41:14 -05:00
William Banfield
d2c150df72 remove catchup logic temporarily 2022-11-23 14:29:30 -05:00
William Banfield
db4a027e91 add peer id to receive 2022-11-23 12:53:49 -05:00
William Banfield
48061e204f label vote sent with peer_id 2022-11-23 12:46:14 -05:00
William Banfield
a46e11c36d fix peer vote count name 2022-11-23 12:30:39 -05:00
William Banfield
3fed395424 add metric to track peer vote set size 2022-11-23 12:24:13 -05:00
William Banfield
a58eec6623 add message receive and send metrics 2022-11-23 11:30:14 -05:00
William Banfield
d5dc6fca3c add metric to all places 2022-11-23 11:18:53 -05:00
William Banfield
7a141c0225 add metric for vote send count 2022-11-23 11:13:17 -05:00
William Banfield
f59905d059 uncomment prevote send 2022-11-23 11:06:18 -05:00
William Banfield
8ebe1d6924 comment out prevote send in general? 2022-11-23 10:59:34 -05:00
William Banfield
d7e99c32ef remove final POL case 2022-11-23 10:55:17 -05:00
William Banfield
db622bee59 remove POL prevote send cases 2022-11-23 10:50:45 -05:00
William Banfield
8ebe9163f9 add type to duplicate metric 2022-11-23 10:38:48 -05:00
William Banfield
adfec78fff set received to nil between rounds 2022-11-22 21:32:52 -05:00
William Banfield
85ded710f5 put duplicate vote metric in place 2022-11-22 20:44:14 -05:00
William Banfield
4762dac9ed add new metric 2022-11-22 20:31:57 -05:00
William Banfield
342a528f81 remove unused metrics method 2022-11-21 13:33:48 -05:00
William Banfield
d73fab154e consensus: additional timing metrics (port of #7849) (#9168)
This change introduces an additional set of metrics aimed at helping operators understand the timing for consensus.

This change adds the following metrics:

```
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="0.1"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="0.2682695795279726"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="0.7196856730011522"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="1.9306977288832508"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="5.1794746792312125"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="13.894954943731381"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="37.27593720314942"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="100.00000000000006"} 29
tendermint_consensus_round_duration_seconds_bucket{chain_id="test-chain-IrF74Y",le="+Inf"} 29
tendermint_consensus_round_duration_seconds_sum{chain_id="test-chain-IrF74Y"} 0.028651869999999996
tendermint_consensus_round_duration_seconds_count{chain_id="test-chain-IrF74Y"} 29
```

```
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="0.1"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="0.2682695795279726"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="0.7196856730011522"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="1.9306977288832508"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="5.1794746792312125"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="13.894954943731381"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="37.27593720314942"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="100.00000000000006"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Commit",le="+Inf"} 29
tendermint_consensus_step_duration_seconds_sum{chain_id="test-chain-IrF74Y",step="Commit"} 0.26650875
tendermint_consensus_step_duration_seconds_count{chain_id="test-chain-IrF74Y",step="Commit"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="0.1"} 0
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="0.2682695795279726"} 0
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="0.7196856730011522"} 0
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="1.9306977288832508"} 28
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="5.1794746792312125"} 28
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="13.894954943731381"} 28
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="37.27593720314942"} 28
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="100.00000000000006"} 28
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewHeight",le="+Inf"} 28
tendermint_consensus_step_duration_seconds_sum{chain_id="test-chain-IrF74Y",step="NewHeight"} 27.773921702
tendermint_consensus_step_duration_seconds_count{chain_id="test-chain-IrF74Y",step="NewHeight"} 28
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="0.1"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="0.2682695795279726"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="0.7196856730011522"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="1.9306977288832508"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="5.1794746792312125"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="13.894954943731381"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="37.27593720314942"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="100.00000000000006"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="NewRound",le="+Inf"} 29
tendermint_consensus_step_duration_seconds_sum{chain_id="test-chain-IrF74Y",step="NewRound"} 0.168961052
tendermint_consensus_step_duration_seconds_count{chain_id="test-chain-IrF74Y",step="NewRound"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="0.1"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="0.2682695795279726"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="0.7196856730011522"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="1.9306977288832508"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="5.1794746792312125"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="13.894954943731381"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="37.27593720314942"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="100.00000000000006"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Precommit",le="+Inf"} 29
tendermint_consensus_step_duration_seconds_sum{chain_id="test-chain-IrF74Y",step="Precommit"} 0.06414115999999999
tendermint_consensus_step_duration_seconds_count{chain_id="test-chain-IrF74Y",step="Precommit"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="0.1"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="0.2682695795279726"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="0.7196856730011522"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="1.9306977288832508"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="5.1794746792312125"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="13.894954943731381"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="37.27593720314942"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="100.00000000000006"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Prevote",le="+Inf"} 29
tendermint_consensus_step_duration_seconds_sum{chain_id="test-chain-IrF74Y",step="Prevote"} 0.177714525
tendermint_consensus_step_duration_seconds_count{chain_id="test-chain-IrF74Y",step="Prevote"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="0.1"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="0.2682695795279726"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="0.7196856730011522"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="1.9306977288832508"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="5.1794746792312125"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="13.894954943731381"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="37.27593720314942"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="100.00000000000006"} 29
tendermint_consensus_step_duration_seconds_bucket{chain_id="test-chain-IrF74Y",step="Propose",le="+Inf"} 29
tendermint_consensus_step_duration_seconds_sum{chain_id="test-chain-IrF74Y",step="Propose"} 0.221851927
tendermint_consensus_step_duration_seconds_count{chain_id="test-chain-IrF74Y",step="Propose"} 29
```

```
tendermint_consensus_block_gossip_parts_received{chain_id="test-chain-IrF74Y",matches_current="true"} 29
```

---

- [x] Tests written/updated, or no tests needed
- [ ] `CHANGELOG_PENDING.md` updated, or no changelog entry needed
- [x] Updated relevant documentation (`docs/`) and code comments, or no
      documentation updates needed

Closes: #9166
2022-11-21 13:31:57 -05:00
8 changed files with 305 additions and 91 deletions

View File

@@ -1,8 +1,12 @@
package consensus
import (
"strings"
"time"
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"
cstypes "github.com/tendermint/tendermint/consensus/types"
prometheus "github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
@@ -25,6 +29,9 @@ type Metrics struct {
// Number of rounds.
Rounds metrics.Gauge
// Histogram of round duration.
RoundDuration metrics.Histogram
// Number of validators.
Validators metrics.Gauge
// Total power of all validators.
@@ -61,6 +68,14 @@ type Metrics struct {
// Number of blockparts transmitted by peer.
BlockParts metrics.Counter
// Histogram of step duration.
StepDuration metrics.Histogram
stepStart time.Time
// Number of block parts received by the node, separated by whether the part
// was relevant to the block the node is trying to gather or not.
BlockGossipPartsReceived metrics.Counter
// QuroumPrevoteMessageDelay is the interval in seconds between the proposal
// timestamp and the timestamp of the earliest prevote that achieved a quorum
// during the prevote step.
@@ -76,6 +91,12 @@ type Metrics struct {
// timestamp and the timestamp of the latest prevote in a round where 100%
// of the voting power on the network issued prevotes.
FullPrevoteMessageDelay metrics.Gauge
DuplicateVoteReceive metrics.Counter
VoteSent metrics.Counter
VoteReceived metrics.Counter
PeerVoteCount metrics.Gauge
}
// PrometheusMetrics returns Metrics build using Prometheus client library.
@@ -99,7 +120,13 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "rounds",
Help: "Number of rounds.",
}, labels).With(labelsAndValues...),
RoundDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "round_duration_seconds",
Help: "Time spent in a round.",
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
}, labels).With(labelsAndValues...),
Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
@@ -202,6 +229,20 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "block_parts",
Help: "Number of blockparts transmitted by peer.",
}, append(labels, "peer_id")).With(labelsAndValues...),
BlockGossipPartsReceived: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_gossip_parts_received",
Help: "Number of block parts received by the node, labeled by whether the " +
"part was relevant to the block the node was currently gathering or not.",
}, append(labels, "matches_current")).With(labelsAndValues...),
StepDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "step_duration_seconds",
Help: "Time spent per step.",
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
}, append(labels, "step")).With(labelsAndValues...),
QuorumPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
@@ -216,6 +257,30 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Help: "Difference in seconds between the proposal timestamp and the timestamp " +
"of the latest prevote that achieved 100% of the voting power in the prevote step.",
}, labels).With(labelsAndValues...),
DuplicateVoteReceive: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "duplicate_vote_receive",
Help: "Number of votes received multiple times from the same peer by peer",
}, append(labels, "peer_id", "type")).With(labelsAndValues...),
VoteSent: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "vote_sent",
Help: "Number of votes of each type sent.",
}, append(labels, "peer_id", "vote_type")).With(labelsAndValues...),
VoteReceived: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "vote_received",
Help: "Number of votes of each type received.",
}, append(labels, "peer_id", "vote_type")).With(labelsAndValues...),
PeerVoteCount: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "peer_vote_count",
Help: "Number of votes each peer is believed to have.",
}, append(labels, "peer_id", "vote_count_type")).With(labelsAndValues...),
}
}
@@ -226,7 +291,9 @@ func NopMetrics() *Metrics {
ValidatorLastSignedHeight: discard.NewGauge(),
Rounds: discard.NewGauge(),
Rounds: discard.NewGauge(),
RoundDuration: discard.NewHistogram(),
StepDuration: discard.NewHistogram(),
Validators: discard.NewGauge(),
ValidatorsPower: discard.NewGauge(),
@@ -246,7 +313,25 @@ func NopMetrics() *Metrics {
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
BlockGossipPartsReceived: discard.NewCounter(),
QuorumPrevoteMessageDelay: discard.NewGauge(),
FullPrevoteMessageDelay: discard.NewGauge(),
DuplicateVoteReceive: discard.NewCounter(),
VoteSent: discard.NewCounter(),
}
}
func (m *Metrics) MarkRound(r int32, st time.Time) {
m.Rounds.Set(float64(r))
roundTime := time.Since(st).Seconds()
m.RoundDuration.Observe(roundTime)
}
func (m *Metrics) MarkStep(s cstypes.RoundStepType) {
if !m.stepStart.IsZero() {
stepTime := time.Since(m.stepStart).Seconds()
stepName := strings.TrimPrefix(s.String(), "RoundStep")
m.StepDuration.With("step", stepName).Observe(stepTime)
}
m.stepStart = time.Now()
}

View File

@@ -15,6 +15,7 @@ import (
"github.com/tendermint/tendermint/libs/log"
tmsync "github.com/tendermint/tendermint/libs/sync"
"github.com/tendermint/tendermint/p2p"
"github.com/tendermint/tendermint/proto/tendermint/consensus"
tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus"
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
sm "github.com/tendermint/tendermint/state"
@@ -179,7 +180,7 @@ func (conR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
// InitPeer implements Reactor by creating a state for the peer.
func (conR *Reactor) InitPeer(peer p2p.Peer) p2p.Peer {
peerState := NewPeerState(peer).SetLogger(conR.Logger)
peerState := NewPeerState(peer, conR.Metrics).SetLogger(conR.Logger)
peer.Set(types.PeerStateKey, peerState)
return peer
}
@@ -231,6 +232,10 @@ func (conR *Reactor) ReceiveEnvelope(e p2p.Envelope) {
conR.Logger.Debug("Receive", "src", e.Src, "chId", e.ChannelID)
return
}
if m, ok := e.Message.(*consensus.Vote); ok {
conR.Metrics.VoteReceived.With("peer_id", string(e.Src.ID()), "vote_type", m.Vote.Type.String()).Add(1)
}
m := e.Message
if wm, ok := m.(p2p.Wrapper); ok {
m = wm.Wrap()
@@ -349,6 +354,11 @@ func (conR *Reactor) ReceiveEnvelope(e p2p.Envelope) {
ps.EnsureVoteBitArrays(height-1, lastCommitSize)
ps.SetHasVote(msg.Vote)
duplicate := ps.SetReceivedVote(msg.Vote)
if duplicate {
conR.Metrics.DuplicateVoteReceive.With("peer_id", string(e.Src.ID()), "type", msg.Vote.Type.String()).Add(1)
}
cs.peerMsgQueue <- msgInfo{msg, e.Src.ID()}
default:
@@ -746,28 +756,30 @@ OUTER_LOOP:
}
}
// Special catchup logic.
// If peer is lagging by height 1, send LastCommit.
if prs.Height != 0 && rs.Height == prs.Height+1 {
if ps.PickSendVote(rs.LastCommit) {
logger.Debug("Picked rs.LastCommit to send", "height", prs.Height)
continue OUTER_LOOP
}
}
// Catchup logic
// If peer is lagging by more than 1, send Commit.
blockStoreBase := conR.conS.blockStore.Base()
if blockStoreBase > 0 && prs.Height != 0 && rs.Height >= prs.Height+2 && prs.Height >= blockStoreBase {
// Load the block commit for prs.Height,
// which contains precommit signatures for prs.Height.
if commit := conR.conS.blockStore.LoadBlockCommit(prs.Height); commit != nil {
if ps.PickSendVote(commit) {
logger.Debug("Picked Catchup commit to send", "height", prs.Height)
/*
// Special catchup logic.
// If peer is lagging by height 1, send LastCommit.
if prs.Height != 0 && rs.Height == prs.Height+1 {
if ps.PickSendVote(rs.LastCommit) {
logger.Debug("Picked rs.LastCommit to send", "height", prs.Height)
continue OUTER_LOOP
}
}
}
// Catchup logic
// If peer is lagging by more than 1, send Commit.
blockStoreBase := conR.conS.blockStore.Base()
if blockStoreBase > 0 && prs.Height != 0 && rs.Height >= prs.Height+2 && prs.Height >= blockStoreBase {
// Load the block commit for prs.Height,
// which contains precommit signatures for prs.Height.
if commit := conR.conS.blockStore.LoadBlockCommit(prs.Height); commit != nil {
if ps.PickSendVote(commit) {
logger.Debug("Picked Catchup commit to send", "height", prs.Height)
continue OUTER_LOOP
}
}
}
*/
if sleeping == 0 {
// We sent nothing. Sleep...
@@ -793,22 +805,26 @@ func (conR *Reactor) gossipVotesForHeight(
) bool {
// If there are lastCommits to send...
if prs.Step == cstypes.RoundStepNewHeight {
if prs.Step == cstypes.RoundStepNewHeight && prs.Height == rs.Height {
if ps.PickSendVote(rs.LastCommit) {
logger.Debug("Picked rs.LastCommit to send")
return true
}
}
// If there are POL prevotes to send...
if prs.Step <= cstypes.RoundStepPropose && prs.Round != -1 && prs.Round <= rs.Round && prs.ProposalPOLRound != -1 {
if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil {
if ps.PickSendVote(polPrevotes) {
logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send",
"round", prs.ProposalPOLRound)
return true
/*
// If there are POL prevotes to send...
if prs.Step <= cstypes.RoundStepPropose && prs.Round != -1 && prs.Round <= rs.Round && prs.ProposalPOLRound != -1 {
if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil {
if ps.PickSendVote(polPrevotes) {
logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send",
"round", prs.ProposalPOLRound)
return true
}
}
}
}
*/
// If there are prevotes to send...
if prs.Step <= cstypes.RoundStepPrevoteWait && prs.Round != -1 && prs.Round <= rs.Round {
if ps.PickSendVote(rs.Votes.Prevotes(prs.Round)) {
@@ -823,23 +839,26 @@ func (conR *Reactor) gossipVotesForHeight(
return true
}
}
// If there are prevotes to send...Needed because of validBlock mechanism
if prs.Round != -1 && prs.Round <= rs.Round {
if ps.PickSendVote(rs.Votes.Prevotes(prs.Round)) {
logger.Debug("Picked rs.Prevotes(prs.Round) to send", "round", prs.Round)
return true
}
}
// If there are POLPrevotes to send...
if prs.ProposalPOLRound != -1 {
if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil {
if ps.PickSendVote(polPrevotes) {
logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send",
"round", prs.ProposalPOLRound)
return true
/*
// If there are prevotes to send...Needed because of validBlock mechanism
if prs.Round != -1 && prs.Round <= rs.Round {
if ps.PickSendVote(rs.Votes.Prevotes(prs.Round)) {
logger.Debug("Picked rs.Prevotes(prs.Round) to send", "round", prs.Round)
return true
}
}
// If there are POLPrevotes to send...
if prs.ProposalPOLRound != -1 {
if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil {
if ps.PickSendVote(polPrevotes) {
logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send",
"round", prs.ProposalPOLRound)
return true
}
}
}
}
*/
return false
}
@@ -1026,8 +1045,9 @@ var (
// NOTE: THIS GETS DUMPED WITH rpc/core/consensus.go.
// Be mindful of what you Expose.
type PeerState struct {
peer p2p.Peer
logger log.Logger
peer p2p.Peer
logger log.Logger
conMetrics *Metrics
mtx sync.Mutex // NOTE: Modify below using setters, never directly.
PRS cstypes.PeerRoundState `json:"round_state"` // Exposed.
@@ -1046,10 +1066,11 @@ func (pss peerStateStats) String() string {
}
// NewPeerState returns a new PeerState for the given Peer
func NewPeerState(peer p2p.Peer) *PeerState {
func NewPeerState(peer p2p.Peer, m *Metrics) *PeerState {
return &PeerState{
peer: peer,
logger: log.NewNopLogger(),
conMetrics: m,
peer: peer,
logger: log.NewNopLogger(),
PRS: cstypes.PeerRoundState{
Round: -1,
ProposalPOLRound: -1,
@@ -1155,7 +1176,15 @@ func (ps *PeerState) PickSendVote(votes types.VoteSetReader) bool {
Vote: vote.ToProto(),
},
}, ps.logger) {
ps.conMetrics.VoteSent.With("peer_id", string(ps.peer.ID()), "vote_type", vote.Type.String()).Add(1)
ps.SetHasVote(vote)
psVotes := ps.getVoteBitArray(votes.GetHeight(), votes.GetRound(), tmproto.SignedMsgType(votes.Type()))
vct := vote.Type.String()
if vote.Type == tmproto.PrecommitType && vote.Height+1 == ps.PRS.Height {
vct = fmt.Sprintf("last_commit_%s", vct)
}
ps.conMetrics.PeerVoteCount.With("peer_id", string(ps.peer.ID()), "vote_count_type", vct).Set(float64(len(psVotes.GetTrueIndices())))
return true
}
return false
@@ -1239,6 +1268,41 @@ func (ps *PeerState) getVoteBitArray(height int64, round int32, votesType tmprot
return nil
}
func (ps *PeerState) getVoteReceivedBitArray(height int64, round int32, votesType tmproto.SignedMsgType) *bits.BitArray {
if !types.IsVoteTypeValid(votesType) {
return nil
}
if ps.PRS.Height == height {
if ps.PRS.Round == round {
switch votesType {
case tmproto.PrevoteType:
return ps.PRS.PrevotesReceived
case tmproto.PrecommitType:
return ps.PRS.PrecommitsReceived
}
}
if ps.PRS.CatchupCommitRound == round {
switch votesType {
case tmproto.PrevoteType:
return nil
case tmproto.PrecommitType:
return ps.PRS.CatchupCommit
}
}
if ps.PRS.ProposalPOLRound == round {
switch votesType {
case tmproto.PrevoteType:
return ps.PRS.ProposalPOL
case tmproto.PrecommitType:
return nil
}
}
return nil
}
return nil
}
// 'round': A round for which we have a +2/3 commit.
func (ps *PeerState) ensureCatchupCommitRound(height int64, round int32, numValidators int) {
if ps.PRS.Height != height {
@@ -1286,6 +1350,12 @@ func (ps *PeerState) ensureVoteBitArrays(height int64, numValidators int) {
if ps.PRS.Precommits == nil {
ps.PRS.Precommits = bits.NewBitArray(numValidators)
}
if ps.PRS.PrevotesReceived == nil {
ps.PRS.PrevotesReceived = bits.NewBitArray(numValidators)
}
if ps.PRS.PrecommitsReceived == nil {
ps.PRS.PrecommitsReceived = bits.NewBitArray(numValidators)
}
if ps.PRS.CatchupCommit == nil {
ps.PRS.CatchupCommit = bits.NewBitArray(numValidators)
}
@@ -1345,6 +1415,13 @@ func (ps *PeerState) SetHasVote(vote *types.Vote) {
ps.setHasVote(vote.Height, vote.Round, vote.Type, vote.ValidatorIndex)
}
func (ps *PeerState) SetReceivedVote(vote *types.Vote) bool {
ps.mtx.Lock()
defer ps.mtx.Unlock()
return ps.setReceivedVote(vote.Height, vote.Round, vote.Type, vote.ValidatorIndex)
}
func (ps *PeerState) setHasVote(height int64, round int32, voteType tmproto.SignedMsgType, index int32) {
ps.logger.Debug("setHasVote",
"peerH/R",
@@ -1359,6 +1436,15 @@ func (ps *PeerState) setHasVote(height int64, round int32, voteType tmproto.Sign
psVotes.SetIndex(int(index), true)
}
}
func (ps *PeerState) setReceivedVote(height int64, round int32, voteType tmproto.SignedMsgType, index int32) bool {
psVotes := ps.getVoteReceivedBitArray(height, round, voteType)
var alreadySet bool
if psVotes != nil {
alreadySet = psVotes.GetIndex(int(index))
psVotes.SetIndex(int(index), true)
}
return alreadySet
}
// ApplyNewRoundStepMessage updates the peer state for the new round.
func (ps *PeerState) ApplyNewRoundStepMessage(msg *NewRoundStepMessage) {
@@ -1375,6 +1461,7 @@ func (ps *PeerState) ApplyNewRoundStepMessage(msg *NewRoundStepMessage) {
psRound := ps.PRS.Round
psCatchupCommitRound := ps.PRS.CatchupCommitRound
psCatchupCommit := ps.PRS.CatchupCommit
lastPrecommits := ps.PRS.Precommits
startTime := tmtime.Now().Add(-1 * time.Duration(msg.SecondsSinceStartTime) * time.Second)
ps.PRS.Height = msg.Height
@@ -1390,6 +1477,8 @@ func (ps *PeerState) ApplyNewRoundStepMessage(msg *NewRoundStepMessage) {
// We'll update the BitArray capacity later.
ps.PRS.Prevotes = nil
ps.PRS.Precommits = nil
ps.PRS.PrevotesReceived = nil
ps.PRS.PrecommitsReceived = nil
}
if psHeight == msg.Height && psRound != msg.Round && msg.Round == psCatchupCommitRound {
// Peer caught up to CatchupCommitRound.
@@ -1402,7 +1491,7 @@ func (ps *PeerState) ApplyNewRoundStepMessage(msg *NewRoundStepMessage) {
// Shift Precommits to LastCommit.
if psHeight+1 == msg.Height && psRound == msg.LastCommitRound {
ps.PRS.LastCommitRound = msg.LastCommitRound
ps.PRS.LastCommit = ps.PRS.Precommits
ps.PRS.LastCommit = lastPrecommits
} else {
ps.PRS.LastCommitRound = msg.LastCommitRound
ps.PRS.LastCommit = nil

View File

@@ -520,6 +520,14 @@ func (cs *State) updateHeight(height int64) {
}
func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) {
if !cs.replayMode {
if round != cs.Round || round == 0 && step == cstypes.RoundStepNewRound {
cs.metrics.MarkRound(cs.Round, cs.StartTime)
}
if cs.Step != step {
cs.metrics.MarkStep(cs.Step)
}
}
cs.Round = round
cs.Step = step
}
@@ -1019,9 +1027,6 @@ func (cs *State) enterNewRound(height int64, round int32) {
if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil {
cs.Logger.Error("failed publishing new round", "err", err)
}
cs.metrics.Rounds.Set(float64(round))
// Wait for txs to be available in the mempool
// before we enterPropose in round 0. If the last block changed the app hash,
// we may need an empty "proof" block, and enterPropose immediately.
@@ -1853,11 +1858,13 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add
// Blocks might be reused, so round mismatch is OK
if cs.Height != height {
cs.Logger.Debug("received block part from wrong height", "height", height, "round", round)
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
return false, nil
}
// We're not expecting a block part.
if cs.ProposalBlockParts == nil {
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
// NOTE: this can happen when we've gone to a higher round and
// then receive parts from the previous round - not necessarily a bad peer.
cs.Logger.Debug(
@@ -1872,8 +1879,14 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add
added, err = cs.ProposalBlockParts.AddPart(part)
if err != nil {
if errors.Is(err, types.ErrPartSetInvalidProof) || errors.Is(err, types.ErrPartSetUnexpectedIndex) {
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
}
return added, err
}
cs.metrics.BlockGossipPartsReceived.With("matches_current", "true").Add(1)
if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes {
return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)",
cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes,

View File

@@ -34,6 +34,10 @@ type PeerRoundState struct {
LastCommitRound int32 `json:"last_commit_round"` // Round of commit for last height. -1 if none.
LastCommit *bits.BitArray `json:"last_commit"` // All commit precommits of commit for last height.
// FOR TESTING PURPOSES
PrevotesReceived *bits.BitArray `json:"prevotes_received"` // All votes peer has for this round
PrecommitsReceived *bits.BitArray `json:"precommits_received"` // All precommits peer has for this round
// Round that we have commit for. Not necessarily unique. -1 if none.
CatchupCommitRound int32 `json:"catchup_commit_round"`

View File

@@ -18,40 +18,42 @@ Listen address can be changed in the config file (see
The following metrics are available:
| **Name** | **Type** | **Tags** | **Description** |
|------------------------------------------|-----------|-------------------|------------------------------------------------------------------------|
| `consensus_height` | Gauge | | Height of the chain |
| `consensus_validators` | Gauge | | Number of validators |
| `consensus_validators_power` | Gauge | | Total voting power of all validators |
| `consensus_validator_power` | Gauge | | Voting power of the node if in the validator set |
| `consensus_validator_last_signed_height` | Gauge | | Last height the node signed a block, if the node is a validator |
| `consensus_validator_missed_blocks` | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
| `consensus_missing_validators` | Gauge | | Number of validators who did not sign |
| `consensus_missing_validators_power` | Gauge | | Total voting power of the missing validators |
| `consensus_byzantine_validators` | Gauge | | Number of validators who tried to double sign |
| `consensus_byzantine_validators_power` | Gauge | | Total voting power of the byzantine validators |
| `consensus_block_interval_seconds` | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
| `consensus_rounds` | Gauge | | Number of rounds |
| `consensus_num_txs` | Gauge | | Number of transactions |
| `consensus_total_txs` | Gauge | | Total number of transactions committed |
| `consensus_block_parts` | Counter | `peer_id` | Number of blockparts transmitted by peer |
| `consensus_latest_block_height` | Gauge | | /status sync\_info number |
| `consensus_fast_syncing` | Gauge | | Either 0 (not fast syncing) or 1 (syncing) |
| `consensus_state_syncing` | Gauge | | Either 0 (not state syncing) or 1 (syncing) |
| `consensus_block_size_bytes` | Gauge | | Block size in bytes |
| `p2p_message_send_bytes_total` | Counter | `message_type` | Number of bytes sent to all peers per message type |
| `p2p_message_receive_bytes_total` | Counter | `message_type` | Number of bytes received from all peers per message type |
| `p2p_peers` | Gauge | | Number of peers node's connected to |
| `p2p_peer_receive_bytes_total` | Counter | `peer_id`, `chID` | Number of bytes per channel received from a given peer |
| `p2p_peer_send_bytes_total` | Counter | `peer_id`, `chID` | Number of bytes per channel sent to a given peer |
| `p2p_peer_pending_send_bytes` | Gauge | `peer_id` | Number of pending bytes to be sent to a given peer |
| `p2p_num_txs` | Gauge | `peer_id` | Number of transactions submitted by each peer\_id |
| `p2p_pending_send_bytes` | Gauge | `peer_id` | Amount of data pending to be sent to peer |
| `mempool_size` | Gauge | | Number of uncommitted transactions |
| `mempool_tx_size_bytes` | Histogram | | Transaction sizes in bytes |
| `mempool_failed_txs` | Counter | | Number of failed transactions |
| `mempool_recheck_times` | Counter | | Number of transactions rechecked in the mempool |
| `state_block_processing_time` | Histogram | | Time between BeginBlock and EndBlock in ms |
| **Name** | **Type** | **Tags** | **Description** |
|----------------------------------------|-----------|-----------------|----------------------------------------------------------------------------------------|
| abci_connection_method_timing_seconds | Histogram | method, type | Timings for each of the ABCI methods |
| consensus_height | Gauge | | Height of the chain |
| consensus_validators | Gauge | | Number of validators |
| consensus_validators_power | Gauge | | Total voting power of all validators |
| consensus_validator_power | Gauge | | Voting power of the node if in the validator set |
| consensus_validator_last_signed_height | Gauge | | Last height the node signed a block, if the node is a validator |
| consensus_validator_missed_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
| consensus_missing_validators | Gauge | | Number of validators who did not sign |
| consensus_missing_validators_power | Gauge | | Total voting power of the missing validators |
| consensus_byzantine_validators | Gauge | | Number of validators who tried to double sign |
| consensus_byzantine_validators_power | Gauge | | Total voting power of the byzantine validators |
| consensus_block_interval_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
| consensus_rounds | Gauge | | Number of rounds |
| consensus_num_txs | Gauge | | Number of transactions |
| consensus_total_txs | Gauge | | Total number of transactions committed |
| consensus_block_parts | counter | peer_id | number of blockparts transmitted by peer |
| consensus_latest_block_height | gauge | | /status sync_info number |
| consensus_fast_syncing | gauge | | either 0 (not fast syncing) or 1 (syncing) |
| consensus_state_syncing | gauge | | either 0 (not state syncing) or 1 (syncing) |
| consensus_block_size_bytes | Gauge | | Block size in bytes |
| consensus_step_duration | Histogram | step | Histogram of durations for each step in the consensus protocol |
| consensus_round_duration | Histogram | | Histogram of durations for all the rounds that have occurred since the process started |
| consensus_block_gossip_parts_received | Counter | matches_current | Number of block parts received by the node |
| p2p_peers | Gauge | | Number of peers node's connected to |
| p2p_peer_receive_bytes_total | counter | peer_id, chID | number of bytes per channel received from a given peer |
| p2p_peer_send_bytes_total | counter | peer_id, chID | number of bytes per channel sent to a given peer |
| p2p_peer_pending_send_bytes | gauge | peer_id | number of pending bytes to be sent to a given peer |
| p2p_num_txs | gauge | peer_id | number of transactions submitted by each peer_id |
| p2p_pending_send_bytes | gauge | peer_id | amount of data pending to be sent to peer |
| mempool_size | Gauge | | Number of uncommitted transactions |
| mempool_tx_size_bytes | histogram | | transaction sizes in bytes |
| mempool_failed_txs | counter | | number of failed transactions |
| mempool_recheck_times | counter | | number of transactions rechecked in the mempool |
| state_block_processing_time | histogram | | time between BeginBlock and EndBlock in ms |
## Useful queries

View File

@@ -19,6 +19,8 @@ type BitArray struct {
Elems []uint64 `json:"elems"` // NOTE: persisted via reflect, must be exported
}
// New bit array creates a bit array where all of the bits are initially 'false'
//
// NewBitArray returns a new bit array.
// It returns nil if the number of bits is zero.
func NewBitArray(bits int) *BitArray {
@@ -39,6 +41,8 @@ func (bA *BitArray) Size() int {
return bA.Bits
}
// The bit array represents true as 1 and false as 0
//
// GetIndex returns the bit at index i within the bit array.
// The behavior is undefined if i >= bA.Bits
func (bA *BitArray) GetIndex(i int) bool {
@@ -247,7 +251,7 @@ func (bA *BitArray) PickRandom() (int, bool) {
}
bA.mtx.Lock()
trueIndices := bA.getTrueIndices()
trueIndices := bA.GetTrueIndices()
bA.mtx.Unlock()
if len(trueIndices) == 0 { // no bits set to true
@@ -257,7 +261,7 @@ func (bA *BitArray) PickRandom() (int, bool) {
return trueIndices[tmrand.Intn(len(trueIndices))], true
}
func (bA *BitArray) getTrueIndices() []int {
func (bA *BitArray) GetTrueIndices() []int {
trueIndices := make([]int, 0, bA.Bits)
curBit := 0
numElems := len(bA.Elems)

View File

@@ -41,6 +41,8 @@ type Metrics struct {
MessageReceiveBytesTotal metrics.Counter
// Number of bytes of each message type sent.
MessageSendBytesTotal metrics.Counter
MessageSend metrics.Counter
MessageReceive metrics.Counter
}
// PrometheusMetrics returns Metrics build using Prometheus client library.
@@ -94,6 +96,18 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "message_send_bytes_total",
Help: "Number of bytes of each message type sent.",
}, append(labels, "message_type")).With(labelsAndValues...),
MessageSend: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "message_send",
Help: "Number of messages of each type sent.",
}, append(labels, "message_type")).With(labelsAndValues...),
MessageReceive: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "message_receive",
Help: "Number of messages of each type received.",
}, append(labels, "message_type")).With(labelsAndValues...),
}
}

View File

@@ -333,6 +333,7 @@ func (p *peer) SendEnvelope(e Envelope) bool {
res := p.Send(e.ChannelID, msgBytes)
if res {
p.metrics.MessageSendBytesTotal.With("message_type", metricLabelValue).Add(float64(len(msgBytes)))
p.metrics.MessageSend.With("message_type", metricLabelValue).Add(1)
}
return res
}
@@ -382,6 +383,7 @@ func (p *peer) TrySendEnvelope(e Envelope) bool {
res := p.TrySend(e.ChannelID, msgBytes)
if res {
p.metrics.MessageSendBytesTotal.With("message_type", metricLabelValue).Add(float64(len(msgBytes)))
p.metrics.MessageSend.With("message_type", metricLabelValue).Add(1)
}
return res
}
@@ -534,6 +536,7 @@ func createMConnection(
}
p.metrics.PeerReceiveBytesTotal.With(labels...).Add(float64(len(msgBytes)))
p.metrics.MessageReceiveBytesTotal.With("message_type", p.mlc.ValueToMetricLabel(msg)).Add(float64(len(msgBytes)))
p.metrics.MessageReceive.With("message_type", p.mlc.ValueToMetricLabel(msg)).Add(1)
if nr, ok := reactor.(EnvelopeReceiver); ok {
nr.ReceiveEnvelope(Envelope{
ChannelID: chID,