From 51ccaf6fb89a9e203d0d36fdbcc66f04dab89a84 Mon Sep 17 00:00:00 2001 From: Marko Date: Thu, 9 Jan 2020 11:01:01 +0100 Subject: [PATCH] metrics: add metrics for specific validators (#4294) * add metrics * change == to != * update metrics * cs: check cs.privValidator is not nil in recordMetrics Co-authored-by: Anton Kaliaev --- CHANGELOG_PENDING.md | 8 +++++ consensus/metrics.go | 29 ++++++++++++++++ consensus/state.go | 21 ++++++++++-- docs/tendermint-core/metrics.md | 59 +++++++++++++++++---------------- 4 files changed, 86 insertions(+), 31 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 359c952e3..1b5a93450 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -16,10 +16,18 @@ program](https://hackerone.com/tendermint). - Go API ### FEATURES: + - [rpc/lib] [\#4248](https://github.com/tendermint/tendermint/issues/4248) RPC client basic authentication support (@greg-szabo) +- [metrics] \#4263 Add + - `consensus_validator_power`: track your validators power + - `consensus_validator_last_signed_height`: track at which height the validator last signed + - `consensus_validator_missed_blocks`: total amount of missed blocks for a validator + as gauges in prometheus for validator specific metrics + ### IMPROVEMENTS: ### BUG FIXES: + - [rpc/lib] [\#4051](https://github.com/tendermint/tendermint/pull/4131) Fix RPC client, which was previously resolving https protocol to http (@yenkhoon) - [cs] \#4069 Don't panic when block meta is not found in store (@gregzaitsev) diff --git a/consensus/metrics.go b/consensus/metrics.go index b5207742c..5fa27118a 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -19,6 +19,9 @@ type Metrics struct { // Height of the chain. Height metrics.Gauge + // ValidatorLastSignedHeight of a validator. + ValidatorLastSignedHeight metrics.Gauge + // Number of rounds. Rounds metrics.Gauge @@ -26,6 +29,10 @@ type Metrics struct { Validators metrics.Gauge // Total power of all validators. ValidatorsPower metrics.Gauge + // Power of a validator. + ValidatorPower metrics.Gauge + // Amount of blocks missed by a validator. + ValidatorMissedBlocks metrics.Gauge // Number of validators who did not sign. MissingValidators metrics.Gauge // Total power of the missing validators. @@ -81,12 +88,30 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "validators", Help: "Number of validators.", }, labels).With(labelsAndValues...), + ValidatorLastSignedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_last_signed_height", + Help: "Last signed height for a validator", + }, append(labels, "validator_address")).With(labelsAndValues...), + ValidatorMissedBlocks: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_missed_blocks", + Help: "Total missed blocks for a validator", + }, append(labels, "validator_address")).With(labelsAndValues...), ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "validators_power", Help: "Total power of all validators.", }, labels).With(labelsAndValues...), + ValidatorPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_power", + Help: "Power of a validator", + }, append(labels, "validator_address")).With(labelsAndValues...), MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -163,10 +188,14 @@ func NopMetrics() *Metrics { return &Metrics{ Height: discard.NewGauge(), + ValidatorLastSignedHeight: discard.NewGauge(), + Rounds: discard.NewGauge(), Validators: discard.NewGauge(), ValidatorsPower: discard.NewGauge(), + ValidatorPower: discard.NewGauge(), + ValidatorMissedBlocks: discard.NewGauge(), MissingValidators: discard.NewGauge(), MissingValidatorsPower: discard.NewGauge(), ByzantineValidators: discard.NewGauge(), diff --git a/consensus/state.go b/consensus/state.go index 2eedc8994..8fd0ad87a 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1459,8 +1459,11 @@ func (cs *ConsensusState) finalizeCommit(height int64) { func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { cs.metrics.Validators.Set(float64(cs.Validators.Size())) cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) - missingValidators := 0 - missingValidatorsPower := int64(0) + + var ( + missingValidators = 0 + missingValidatorsPower int64 + ) for i, val := range cs.Validators.Validators { var vote *types.CommitSig if i < len(block.LastCommit.Precommits) { @@ -1470,9 +1473,22 @@ func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { missingValidators++ missingValidatorsPower += val.VotingPower } + + if cs.privValidator != nil && bytes.Equal(val.Address, cs.privValidator.GetPubKey().Address()) { + label := []string{ + "validator_address", val.Address.String(), + } + cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) + if vote != nil { + cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) + } else { + cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) + } + } } cs.metrics.MissingValidators.Set(float64(missingValidators)) cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) + cs.metrics.ByzantineValidators.Set(float64(len(block.Evidence.Evidence))) byzantineValidatorsPower := int64(0) for _, ev := range block.Evidence.Evidence { @@ -1493,7 +1509,6 @@ func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { cs.metrics.BlockSizeBytes.Set(float64(block.Size())) cs.metrics.TotalTxs.Set(float64(block.TotalTxs)) cs.metrics.CommittedHeight.Set(float64(block.Height)) - } //----------------------------------------------------------------------------- diff --git a/docs/tendermint-core/metrics.md b/docs/tendermint-core/metrics.md index 1426af4ff..c6ac6c3a7 100644 --- a/docs/tendermint-core/metrics.md +++ b/docs/tendermint-core/metrics.md @@ -18,34 +18,37 @@ Listen address can be changed in the config file (see The following metrics are available: -| **Name** | **Type** | **Since** | **Tags** | **Description** | -| --------------------------------------- | --------- | --------- | -------------- | --------------------------------------------------------------- | -| consensus\_height | Gauge | 0.21.0 | | Height of the chain | -| consensus\_validators | Gauge | 0.21.0 | | Number of validators | -| consensus\_validators\_power | Gauge | 0.21.0 | | Total voting power of all validators | -| consensus\_missing\_validators | Gauge | 0.21.0 | | Number of validators who did not sign | -| consensus\_missing\_validators\_power | Gauge | 0.21.0 | | Total voting power of the missing validators | -| consensus\_byzantine\_validators | Gauge | 0.21.0 | | Number of validators who tried to double sign | -| consensus\_byzantine\_validators\_power | Gauge | 0.21.0 | | Total voting power of the byzantine validators | -| consensus\_block\_interval\_seconds | Histogram | 0.21.0 | | Time between this and last block (Block.Header.Time) in seconds | -| consensus\_rounds | Gauge | 0.21.0 | | Number of rounds | -| consensus\_num\_txs | Gauge | 0.21.0 | | Number of transactions | -| consensus\_block\_parts | counter | on dev | peer\_id | number of blockparts transmitted by peer | -| consensus\_latest\_block\_height | gauge | on dev | | /status sync\_info number | -| consensus\_fast\_syncing | gauge | on dev | | either 0 (not fast syncing) or 1 (syncing) | -| consensus\_total\_txs | Gauge | 0.21.0 | | Total number of transactions committed | -| consensus\_block\_size\_bytes | Gauge | 0.21.0 | | Block size in bytes | -| p2p\_peers | Gauge | 0.21.0 | | Number of peers node's connected to | -| p2p\_peer\_receive\_bytes\_total | counter | on dev | peer\_id, chID | number of bytes per channel received from a given peer | -| p2p\_peer\_send\_bytes\_total | counter | on dev | peer\_id, chID | number of bytes per channel sent to a given peer | -| p2p\_peer\_pending\_send\_bytes | gauge | on dev | peer\_id | number of pending bytes to be sent to a given peer | -| p2p\_num\_txs | gauge | on dev | peer\_id | number of transactions submitted by each peer\_id | -| p2p\_pending\_send\_bytes | gauge | on dev | peer\_id | amount of data pending to be sent to peer | -| mempool\_size | Gauge | 0.21.0 | | Number of uncommitted transactions | -| mempool\_tx\_size\_bytes | histogram | on dev | | transaction sizes in bytes | -| mempool\_failed\_txs | counter | on dev | | number of failed transactions | -| mempool\_recheck\_times | counter | on dev | | number of transactions rechecked in the mempool | -| state\_block\_processing\_time | histogram | on dev | | time between BeginBlock and EndBlock in ms | +| **Name** | **Type** | **Since** | **Tags** | **Description** | +| -------------------------------------- | --------- | --------- | ------------- | ---------------------------------------------------------------------- | +| consensus_height | Gauge | 0.21.0 | | Height of the chain | +| consensus_validators | Gauge | 0.21.0 | | Number of validators | +| consensus_validators_power | Gauge | 0.21.0 | | Total voting power of all validators | +| consensus_validator_power | Gauge | 0.33.0 | | Voting power of the node if in the validator set | +| consensus_validator_last_signed_height | Gauge | 0.33.0 | | Last height the node signed a block, if the node is a validator | +| consensus_validator_missed_blocks | Gauge | 0.33.0 | | Total amount of blocks missed for the node, if the node is a validator | +| consensus_missing_validators | Gauge | 0.21.0 | | Number of validators who did not sign | +| consensus_missing_validators_power | Gauge | 0.21.0 | | Total voting power of the missing validators | +| consensus_byzantine_validators | Gauge | 0.21.0 | | Number of validators who tried to double sign | +| consensus_byzantine_validators_power | Gauge | 0.21.0 | | Total voting power of the byzantine validators | +| consensus_block_interval_seconds | Histogram | 0.21.0 | | Time between this and last block (Block.Header.Time) in seconds | +| consensus_rounds | Gauge | 0.21.0 | | Number of rounds | +| consensus_num_txs | Gauge | 0.21.0 | | Number of transactions | +| consensus_total_txs | Gauge | 0.21.0 | | Total number of transactions committed | +| consensus_block_parts | counter | on dev | peer_id | number of blockparts transmitted by peer | +| consensus_latest_block_height | gauge | on dev | | /status sync_info number | +| consensus_fast_syncing | gauge | on dev | | either 0 (not fast syncing) or 1 (syncing) | +| consensus_block_size_bytes | Gauge | 0.21.0 | | Block size in bytes | +| p2p_peers | Gauge | 0.21.0 | | Number of peers node's connected to | +| p2p_peer_receive_bytes_total | counter | on dev | peer_id, chID | number of bytes per channel received from a given peer | +| p2p_peer_send_bytes_total | counter | on dev | peer_id, chID | number of bytes per channel sent to a given peer | +| p2p_peer_pending_send_bytes | gauge | on dev | peer_id | number of pending bytes to be sent to a given peer | +| p2p_num_txs | gauge | on dev | peer_id | number of transactions submitted by each peer_id | +| p2p_pending_send_bytes | gauge | on dev | peer_id | amount of data pending to be sent to peer | +| mempool_size | Gauge | 0.21.0 | | Number of uncommitted transactions | +| mempool_tx_size_bytes | histogram | on dev | | transaction sizes in bytes | +| mempool_failed_txs | counter | on dev | | number of failed transactions | +| mempool_recheck_times | counter | on dev | | number of transactions rechecked in the mempool | +| state_block_processing_time | histogram | on dev | | time between BeginBlock and EndBlock in ms | ## Useful queries