Merge branch 'main' into rpc-header

This commit is contained in:
samricotta
2022-08-08 13:38:03 +02:00
committed by GitHub
23 changed files with 418 additions and 133 deletions

View File

@@ -1,21 +1,20 @@
# TODO(thane): Re-enable once we've pulled in the ADRs and RFCs from master.
# Verify that important design docs have ToC entries.
#name: Check documentation ToC
#on:
# pull_request:
# push:
# branches:
# - main
#
#jobs:
# check:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: technote-space/get-diff-action@v6
# with:
# PATTERNS: |
# docs/architecture/**
# docs/rfc/**
# - run: ./docs/presubmit.sh
# if: env.GIT_DIFF
name: Check documentation ToC
on:
pull_request:
push:
branches:
- main
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: technote-space/get-diff-action@v6
with:
PATTERNS: |
docs/architecture/**
docs/rfc/**
- run: make check-docs-toc
if: env.GIT_DIFF

View File

@@ -27,6 +27,12 @@ jobs:
with:
ref: 'v0.34.x'
- name: Capture git repo info
id: git-info
run: |
echo "::set-output name=branch::`git branch --show-current`"
echo "::set-output name=commit::`git rev-parse HEAD`"
- name: Build
working-directory: test/e2e
# Run make jobs in parallel, since we can't run steps in parallel.
@@ -41,18 +47,58 @@ jobs:
working-directory: test/e2e
run: ./run-multiple.sh networks/nightly/*-group${{ matrix.group }}-*.toml
outputs:
git-branch: ${{ steps.git-info.outputs.branch }}
git-commit: ${{ steps.git-info.outputs.commit }}
e2e-nightly-fail:
needs: e2e-nightly-test
if: ${{ failure() }}
runs-on: ubuntu-latest
steps:
- name: Notify Slack on failure
uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
uses: slackapi/slack-github-action@v1.21.0
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_CHANNEL: tendermint-internal
SLACK_USERNAME: Nightly E2E Tests
SLACK_ICON_EMOJI: ':skull:'
SLACK_COLOR: danger
SLACK_MESSAGE: Nightly E2E tests failed on v0.34.x
SLACK_FOOTER: ''
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
BRANCH: ${{ needs.e2e-nightly-test.outputs.git-branch }}
RUN_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
COMMIT_URL: "${{ github.server_url }}/${{ github.repository }}/commit/${{ needs.e2e-nightly-test.outputs.git-commit }}"
with:
payload: |
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
}
}
]
}
e2e-nightly-success: # may turn this off once they seem to pass consistently
needs: e2e-nightly-test
if: ${{ success() }}
runs-on: ubuntu-latest
steps:
- name: Notify Slack on success
uses: slackapi/slack-github-action@v1.21.0
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
BRANCH: ${{ needs.e2e-nightly-test.outputs.git-branch }}
with:
payload: |
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":white_check_mark: Nightly E2E tests for `${{ env.BRANCH }}` passed."
}
}
]
}

View File

@@ -46,15 +46,26 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Notify Slack on failure
uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
uses: slackapi/slack-github-action@v1.21.0
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_CHANNEL: tendermint-internal
SLACK_USERNAME: Nightly E2E Tests
SLACK_ICON_EMOJI: ':skull:'
SLACK_COLOR: danger
SLACK_MESSAGE: Nightly E2E tests failed on main
SLACK_FOOTER: ''
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
BRANCH: ${{ github.ref_name }}
RUN_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
COMMIT_URL: "${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}"
with:
payload: |
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
}
}
]
}
e2e-nightly-success: # may turn this off once they seem to pass consistently
needs: e2e-nightly-test
@@ -62,12 +73,21 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Notify Slack on success
uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
uses: slackapi/slack-github-action@v1.21.0
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_CHANNEL: tendermint-internal
SLACK_USERNAME: Nightly E2E Tests
SLACK_ICON_EMOJI: ':white_check_mark:'
SLACK_COLOR: good
SLACK_MESSAGE: Nightly E2E tests passed on main
SLACK_FOOTER: ''
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
BRANCH: ${{ github.ref_name }}
with:
payload: |
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":white_check_mark: Nightly E2E tests for `${{ env.BRANCH }}` passed."
}
}
]
}

View File

@@ -75,13 +75,24 @@ jobs:
if: ${{ needs.fuzz-nightly-test.outputs.crashers-count != 0 }}
runs-on: ubuntu-latest
steps:
- name: Notify Slack if any crashers
uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
- name: Notify Slack on failure
uses: slackapi/slack-github-action@v1.21.0
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_CHANNEL: tendermint-internal
SLACK_USERNAME: Nightly Fuzz Tests
SLACK_ICON_EMOJI: ':firecracker:'
SLACK_COLOR: danger
SLACK_MESSAGE: Crashers found in Nightly Fuzz tests
SLACK_FOOTER: ''
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
BRANCH: ${{ github.ref_name }}
CRASHERS: ${{ needs.fuzz-nightly-test.outputs.crashers-count }}
RUN_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
with:
payload: |
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":skull: Nightly fuzz tests for `${{ env.BRANCH }}` failed with ${{ env.CRASHERS }} crasher(s). See the <${{ env.RUN_URL }}|run details>."
}
}
]
}

View File

@@ -289,6 +289,11 @@ sync-docs:
aws cloudfront create-invalidation --distribution-id ${CF_DISTRIBUTION_ID} --profile terraform --path "/*" ;
.PHONY: sync-docs
# Verify that important design docs have ToC entries.
check-docs-toc:
@./docs/presubmit.sh
.PHONY: check-docs-toc
###############################################################################
### Docker image ###
###############################################################################

View File

@@ -62,7 +62,7 @@ func newReactor(
app := &testApp{}
cc := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
if err != nil {
panic(fmt.Errorf("error start app: %w", err))

View File

@@ -1,8 +1,13 @@
package consensus
import (
"strings"
"time"
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"
cstypes "github.com/tendermint/tendermint/consensus/types"
"github.com/tendermint/tendermint/types"
prometheus "github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
@@ -25,6 +30,9 @@ type Metrics struct {
// Number of rounds.
Rounds metrics.Gauge
// Histogram of round duration.
RoundDuration metrics.Histogram
// Number of validators.
Validators metrics.Gauge
// Total power of all validators.
@@ -61,6 +69,14 @@ type Metrics struct {
// Number of blockparts transmitted by peer.
BlockParts metrics.Counter
// Histogram of step duration.
StepDuration metrics.Histogram
stepStart time.Time
// Number of block parts received by the node, separated by whether the part
// was relevant to the block the node is trying to gather or not.
BlockGossipPartsReceived metrics.Counter
// QuroumPrevoteMessageDelay is the interval in seconds between the proposal
// timestamp and the timestamp of the earliest prevote that achieved a quorum
// during the prevote step.
@@ -99,7 +115,13 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "rounds",
Help: "Number of rounds.",
}, labels).With(labelsAndValues...),
RoundDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "round_duration_seconds",
Help: "Time spent in a round.",
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
}, labels).With(labelsAndValues...),
Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
@@ -202,6 +224,20 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "block_parts",
Help: "Number of blockparts transmitted by peer.",
}, append(labels, "peer_id")).With(labelsAndValues...),
BlockGossipPartsReceived: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_gossip_parts_received",
Help: "Number of block parts received by the node, labeled by whether the " +
"part was relevant to the block the node was currently gathering or not.",
}, append(labels, "matches_current")).With(labelsAndValues...),
StepDuration: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "step_duration_seconds",
Help: "Time spent per step.",
Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8),
}, append(labels, "step")).With(labelsAndValues...),
QuorumPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
@@ -226,7 +262,9 @@ func NopMetrics() *Metrics {
ValidatorLastSignedHeight: discard.NewGauge(),
Rounds: discard.NewGauge(),
Rounds: discard.NewGauge(),
RoundDuration: discard.NewHistogram(),
StepDuration: discard.NewHistogram(),
Validators: discard.NewGauge(),
ValidatorsPower: discard.NewGauge(),
@@ -246,7 +284,31 @@ func NopMetrics() *Metrics {
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
BlockGossipPartsReceived: discard.NewCounter(),
QuorumPrevoteMessageDelay: discard.NewGauge(),
FullPrevoteMessageDelay: discard.NewGauge(),
}
}
// RecordConsMetrics uses for recording the block related metrics during fast-sync.
func (m *Metrics) RecordConsMetrics(block *types.Block) {
m.NumTxs.Set(float64(len(block.Data.Txs)))
m.TotalTxs.Add(float64(len(block.Data.Txs)))
m.BlockSizeBytes.Set(float64(block.Size()))
m.CommittedHeight.Set(float64(block.Height))
}
func (m *Metrics) MarkRound(r int32, st time.Time) {
m.Rounds.Set(float64(r))
roundTime := time.Since(st).Seconds()
m.RoundDuration.Observe(roundTime)
}
func (m *Metrics) MarkStep(s cstypes.RoundStepType) {
if !m.stepStart.IsZero() {
stepTime := time.Since(m.stepStart).Seconds()
stepName := strings.TrimPrefix(s.String(), "RoundStep")
m.StepDuration.With("step", stepName).Observe(stepTime)
}
m.stepStart = time.Now()
}

View File

@@ -309,7 +309,7 @@ func newConsensusStateForReplay(config cfg.BaseConfig, csConfig *cfg.ConsensusCo
// Create proxyAppConn connection (consensus, mempool, query)
clientCreator := proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir())
proxyApp := proxy.NewAppConns(clientCreator)
proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics())
err = proxyApp.Start()
if err != nil {
tmos.Exit(fmt.Sprintf("Error starting proxy app conns: %v", err))

View File

@@ -66,7 +66,7 @@ func newMockProxyApp(appHash []byte, abciResponses *tmstate.ABCIResponses) proxy
if err != nil {
panic(err)
}
return proxy.NewAppConnConsensus(cli)
return proxy.NewAppConnConsensus(cli, proxy.NopMetrics())
}
type mockProxyApp struct {

View File

@@ -710,7 +710,7 @@ func testHandshakeReplay(t *testing.T, config *cfg.Config, nBlocks int, mode uin
if nBlocks > 0 {
// run nBlocks against a new client to build up the app state.
// use a throwaway tendermint state
proxyApp := proxy.NewAppConns(clientCreator2)
proxyApp := proxy.NewAppConns(clientCreator2, proxy.NopMetrics())
stateDB1 := dbm.NewMemDB()
stateStore := sm.NewStore(stateDB1)
err := stateStore.Save(genesisState)
@@ -730,7 +730,7 @@ func testHandshakeReplay(t *testing.T, config *cfg.Config, nBlocks int, mode uin
// now start the app using the handshake - it should sync
genDoc, _ := sm.MakeGenesisDocFromFile(config.GenesisFile())
handshaker := NewHandshaker(stateStore, state, store, genDoc)
proxyApp := proxy.NewAppConns(clientCreator2)
proxyApp := proxy.NewAppConns(clientCreator2, proxy.NopMetrics())
if err := proxyApp.Start(); err != nil {
t.Fatalf("Error starting proxy app connections: %v", err)
}
@@ -839,7 +839,7 @@ func buildTMStateFromChain(
clientCreator := proxy.NewLocalClientCreator(
kvstore.NewPersistentKVStoreApplication(
filepath.Join(config.DBDir(), fmt.Sprintf("replay_test_%d_%d_t", nBlocks, mode))))
proxyApp := proxy.NewAppConns(clientCreator)
proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics())
if err := proxyApp.Start(); err != nil {
panic(err)
}
@@ -905,7 +905,7 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) {
{
app := &badApp{numBlocks: 3, allHashesAreWrong: true}
clientCreator := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(clientCreator)
proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics())
err := proxyApp.Start()
require.NoError(t, err)
t.Cleanup(func() {
@@ -929,7 +929,7 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) {
{
app := &badApp{numBlocks: 3, onlyLastHashIsWrong: true}
clientCreator := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(clientCreator)
proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics())
err := proxyApp.Start()
require.NoError(t, err)
t.Cleanup(func() {
@@ -1233,7 +1233,7 @@ func TestHandshakeUpdatesValidators(t *testing.T) {
// now start the app using the handshake - it should sync
genDoc, _ := sm.MakeGenesisDocFromFile(config.GenesisFile())
handshaker := NewHandshaker(stateStore, state, store, genDoc)
proxyApp := proxy.NewAppConns(clientCreator)
proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics())
if err := proxyApp.Start(); err != nil {
t.Fatalf("Error starting proxy app connections: %v", err)
}

View File

@@ -4,7 +4,7 @@ import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"io"
"os"
"runtime/debug"
"sort"
@@ -523,6 +523,14 @@ func (cs *State) updateHeight(height int64) {
}
func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) {
if !cs.replayMode {
if round != cs.Round || round == 0 && step == cstypes.RoundStepNewRound {
cs.metrics.MarkRound(cs.Round, cs.StartTime)
}
if cs.Step != step {
cs.metrics.MarkStep(cs.Step)
}
}
cs.Round = round
cs.Step = step
}
@@ -1021,9 +1029,6 @@ func (cs *State) enterNewRound(height int64, round int32) {
if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil {
cs.Logger.Error("failed publishing new round", "err", err)
}
cs.metrics.Rounds.Set(float64(round))
// Wait for txs to be available in the mempool
// before we enterPropose in round 0. If the last block changed the app hash,
// we may need an empty "proof" block, and enterPropose immediately.
@@ -1854,11 +1859,13 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add
// Blocks might be reused, so round mismatch is OK
if cs.Height != height {
cs.Logger.Debug("received block part from wrong height", "height", height, "round", round)
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
return false, nil
}
// We're not expecting a block part.
if cs.ProposalBlockParts == nil {
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
// NOTE: this can happen when we've gone to a higher round and
// then receive parts from the previous round - not necessarily a bad peer.
cs.Logger.Debug(
@@ -1873,15 +1880,21 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add
added, err = cs.ProposalBlockParts.AddPart(part)
if err != nil {
if errors.Is(err, types.ErrPartSetInvalidProof) || errors.Is(err, types.ErrPartSetUnexpectedIndex) {
cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1)
}
return added, err
}
cs.metrics.BlockGossipPartsReceived.With("matches_current", "true").Add(1)
if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes {
return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)",
cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes,
)
}
if added && cs.ProposalBlockParts.IsComplete() {
bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader())
bz, err := io.ReadAll(cs.ProposalBlockParts.GetReader())
if err != nil {
return added, err
}

View File

@@ -59,7 +59,7 @@ func WALGenerateNBlocks(t *testing.T, wr io.Writer, numBlocks int) (err error) {
blockStore := store.NewBlockStore(blockStoreDB)
proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app))
proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app), proxy.NopMetrics())
proxyApp.SetLogger(logger.With("module", "proxy"))
if err := proxyApp.Start(); err != nil {
return fmt.Errorf("failed to start proxy app connections: %w", err)

View File

@@ -77,7 +77,7 @@ Note the context/background should be written in the present tense.
- [ADR-006: Trust-Metric](./adr-006-trust-metric.md)
- [ADR-024: Sign-Bytes](./adr-024-sign-bytes.md)
- [ADR-035: Documentation](./adr-035-documentation.md)
- [ADR-039: Peer-Behaviour](./adr-039-peer-behavior.md)
- [ADR-039: Peer-Behaviour](./adr-039-peer-behaviour.md)
- [ADR-060: Go-API-Stability](./adr-060-go-api-stability.md)
- [ADR-061: P2P-Refactor-Scope](./adr-061-p2p-refactor-scope.md)
- [ADR-065: Custom Event Indexing](./adr-065-custom-event-indexing.md)

39
docs/presubmit.sh Executable file
View File

@@ -0,0 +1,39 @@
#!/bin/bash
#
# This script verifies that each document in the docs and architecture
# directory has a corresponding table-of-contents entry in its README file.
#
# This can be run manually from the command line.
# It is also run in CI via the docs-toc.yml workflow.
#
set -euo pipefail
readonly base="$(dirname $0)"
cd "$base"
readonly workdir="$(mktemp -d)"
trap "rm -fr -- '$workdir'" EXIT
checktoc() {
local dir="$1"
local tag="$2"'-*-*'
local out="$workdir/${dir}.out.txt"
(
cd "$dir" >/dev/null
find . -maxdepth 1 -type f -name "$tag" -not -exec grep -q "({})" README.md ';' -print
) > "$out"
if [[ -s "$out" ]] ; then
echo "-- The following files in $dir lack a ToC entry:
"
cat "$out"
return 1
fi
}
err=0
# Verify that each RFC and ADR has a ToC entry in its README file.
checktoc architecture adr || ((err++))
checktoc rfc rfc || ((err++))
exit $err

View File

@@ -18,38 +18,42 @@ Listen address can be changed in the config file (see
The following metrics are available:
| **Name** | **Type** | **Tags** | **Description** |
| -------------------------------------- | --------- | ------------- | ---------------------------------------------------------------------- |
| consensus_height | Gauge | | Height of the chain |
| consensus_validators | Gauge | | Number of validators |
| consensus_validators_power | Gauge | | Total voting power of all validators |
| consensus_validator_power | Gauge | | Voting power of the node if in the validator set |
| consensus_validator_last_signed_height | Gauge | | Last height the node signed a block, if the node is a validator |
| consensus_validator_missed_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
| consensus_missing_validators | Gauge | | Number of validators who did not sign |
| consensus_missing_validators_power | Gauge | | Total voting power of the missing validators |
| consensus_byzantine_validators | Gauge | | Number of validators who tried to double sign |
| consensus_byzantine_validators_power | Gauge | | Total voting power of the byzantine validators |
| consensus_block_interval_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
| consensus_rounds | Gauge | | Number of rounds |
| consensus_num_txs | Gauge | | Number of transactions |
| consensus_total_txs | Gauge | | Total number of transactions committed |
| consensus_block_parts | counter | peer_id | number of blockparts transmitted by peer |
| consensus_latest_block_height | gauge | | /status sync_info number |
| consensus_fast_syncing | gauge | | either 0 (not fast syncing) or 1 (syncing) |
| consensus_state_syncing | gauge | | either 0 (not state syncing) or 1 (syncing) |
| consensus_block_size_bytes | Gauge | | Block size in bytes |
| p2p_peers | Gauge | | Number of peers node's connected to |
| p2p_peer_receive_bytes_total | counter | peer_id, chID | number of bytes per channel received from a given peer |
| p2p_peer_send_bytes_total | counter | peer_id, chID | number of bytes per channel sent to a given peer |
| p2p_peer_pending_send_bytes | gauge | peer_id | number of pending bytes to be sent to a given peer |
| p2p_num_txs | gauge | peer_id | number of transactions submitted by each peer_id |
| p2p_pending_send_bytes | gauge | peer_id | amount of data pending to be sent to peer |
| mempool_size | Gauge | | Number of uncommitted transactions |
| mempool_tx_size_bytes | histogram | | transaction sizes in bytes |
| mempool_failed_txs | counter | | number of failed transactions |
| mempool_recheck_times | counter | | number of transactions rechecked in the mempool |
| state_block_processing_time | histogram | | time between BeginBlock and EndBlock in ms |
| **Name** | **Type** | **Tags** | **Description** |
|----------------------------------------|-----------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------|
| abci_connection_method_timing_seconds | Histogram | method, type | Timings for each of the ABCI methods |
| consensus_height | Gauge | | Height of the chain |
| consensus_validators | Gauge | | Number of validators |
| consensus_validators_power | Gauge | | Total voting power of all validators |
| consensus_validator_power | Gauge | | Voting power of the node if in the validator set |
| consensus_validator_last_signed_height | Gauge | | Last height the node signed a block, if the node is a validator |
| consensus_validator_missed_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator |
| consensus_missing_validators | Gauge | | Number of validators who did not sign |
| consensus_missing_validators_power | Gauge | | Total voting power of the missing validators |
| consensus_byzantine_validators | Gauge | | Number of validators who tried to double sign |
| consensus_byzantine_validators_power | Gauge | | Total voting power of the byzantine validators |
| consensus_block_interval_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds |
| consensus_rounds | Gauge | | Number of rounds |
| consensus_num_txs | Gauge | | Number of transactions |
| consensus_total_txs | Gauge | | Total number of transactions committed |
| consensus_block_parts | counter | peer_id | number of blockparts transmitted by peer |
| consensus_latest_block_height | gauge | | /status sync_info number |
| consensus_fast_syncing | gauge | | either 0 (not fast syncing) or 1 (syncing) |
| consensus_state_syncing | gauge | | either 0 (not state syncing) or 1 (syncing) |
| consensus_block_size_bytes | Gauge | | Block size in bytes |
| consensus_step_duration | Histogram | step | Histogram of durations for each step in the consensus protocol |
| consensus_round_duration | Histogram | | Histogram of durations for all the rounds that have occurred since the process started |
| consensus_block_gossip_parts_received | Counter | matches_current | Number of block parts received by the node |
| p2p_peers | Gauge | | Number of peers node's connected to |
| p2p_peer_receive_bytes_total | counter | peer_id, chID | number of bytes per channel received from a given peer |
| p2p_peer_send_bytes_total | counter | peer_id, chID | number of bytes per channel sent to a given peer |
| p2p_peer_pending_send_bytes | gauge | peer_id | number of pending bytes to be sent to a given peer |
| p2p_num_txs | gauge | peer_id | number of transactions submitted by each peer_id |
| p2p_pending_send_bytes | gauge | peer_id | amount of data pending to be sent to peer |
| mempool_size | Gauge | | Number of uncommitted transactions |
| mempool_tx_size_bytes | histogram | | transaction sizes in bytes |
| mempool_failed_txs | counter | | number of failed transactions |
| mempool_recheck_times | counter | | number of transactions rechecked in the mempool |
| state_block_processing_time | histogram | | time between BeginBlock and EndBlock in ms |
## Useful queries

View File

@@ -113,19 +113,20 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) {
}
// MetricsProvider returns a consensus, p2p and mempool Metrics.
type MetricsProvider func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics)
type MetricsProvider func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics, *proxy.Metrics)
// DefaultMetricsProvider returns Metrics build using Prometheus client library
// if Prometheus is enabled. Otherwise, it returns no-op Metrics.
func DefaultMetricsProvider(config *cfg.InstrumentationConfig) MetricsProvider {
return func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) {
return func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics, *proxy.Metrics) {
if config.Prometheus {
return cs.PrometheusMetrics(config.Namespace, "chain_id", chainID),
p2p.PrometheusMetrics(config.Namespace, "chain_id", chainID),
mempl.PrometheusMetrics(config.Namespace, "chain_id", chainID),
sm.PrometheusMetrics(config.Namespace, "chain_id", chainID)
sm.PrometheusMetrics(config.Namespace, "chain_id", chainID),
proxy.PrometheusMetrics(config.Namespace, "chain_id", chainID)
}
return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics()
return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics(), proxy.NopMetrics()
}
}
@@ -246,8 +247,8 @@ func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.Block
return
}
func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger) (proxy.AppConns, error) {
proxyApp := proxy.NewAppConns(clientCreator)
func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger, metrics *proxy.Metrics) (proxy.AppConns, error) {
proxyApp := proxy.NewAppConns(clientCreator, metrics)
proxyApp.SetLogger(logger.With("module", "proxy"))
if err := proxyApp.Start(); err != nil {
return nil, fmt.Errorf("error starting proxy app connections: %v", err)
@@ -720,8 +721,10 @@ func NewNode(config *cfg.Config,
return nil, err
}
csMetrics, p2pMetrics, memplMetrics, smMetrics, abciMetrics := metricsProvider(genDoc.ChainID)
// Create the proxyApp and establish connections to the ABCI app (consensus, mempool, query).
proxyApp, err := createAndStartProxyAppConns(clientCreator, logger)
proxyApp, err := createAndStartProxyAppConns(clientCreator, logger, abciMetrics)
if err != nil {
return nil, err
}
@@ -786,8 +789,6 @@ func NewNode(config *cfg.Config,
logNodeStartupInfo(state, pubKey, logger, consensusLogger)
csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID)
// Make MempoolReactor
mempool, mempoolReactor := createMempoolAndMempoolReactor(config, proxyApp, state, memplMetrics, logger)

View File

@@ -226,7 +226,7 @@ func TestCreateProposalBlock(t *testing.T) {
config := cfg.ResetTestRoot("node_create_proposal")
defer os.RemoveAll(config.RootDir)
cc := proxy.NewLocalClientCreator(kvstore.NewApplication())
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests
@@ -331,7 +331,7 @@ func TestMaxProposalBlockSize(t *testing.T) {
config := cfg.ResetTestRoot("node_create_proposal")
defer os.RemoveAll(config.RootDir)
cc := proxy.NewLocalClientCreator(kvstore.NewApplication())
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests

View File

@@ -1,6 +1,9 @@
package proxy
import (
"time"
"github.com/go-kit/kit/metrics"
abcicli "github.com/tendermint/tendermint/abci/client"
"github.com/tendermint/tendermint/abci/types"
)
@@ -56,11 +59,13 @@ type AppConnSnapshot interface {
// Implements AppConnConsensus (subset of abcicli.Client)
type appConnConsensus struct {
metrics *Metrics
appConn abcicli.Client
}
func NewAppConnConsensus(appConn abcicli.Client) AppConnConsensus {
func NewAppConnConsensus(appConn abcicli.Client, metrics *Metrics) AppConnConsensus {
return &appConnConsensus{
metrics: metrics,
appConn: appConn,
}
}
@@ -74,22 +79,27 @@ func (app *appConnConsensus) Error() error {
}
func (app *appConnConsensus) InitChainSync(req types.RequestInitChain) (*types.ResponseInitChain, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "init_chain", "type", "sync"))()
return app.appConn.InitChainSync(req)
}
func (app *appConnConsensus) BeginBlockSync(req types.RequestBeginBlock) (*types.ResponseBeginBlock, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "begin_block", "type", "sync"))()
return app.appConn.BeginBlockSync(req)
}
func (app *appConnConsensus) DeliverTxAsync(req types.RequestDeliverTx) *abcicli.ReqRes {
defer addTimeSample(app.metrics.MethodTiming.With("method", "deliver_tx", "type", "async"))()
return app.appConn.DeliverTxAsync(req)
}
func (app *appConnConsensus) EndBlockSync(req types.RequestEndBlock) (*types.ResponseEndBlock, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "end_block", "type", "sync"))()
return app.appConn.EndBlockSync(req)
}
func (app *appConnConsensus) CommitSync() (*types.ResponseCommit, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "commit", "type", "sync"))()
return app.appConn.CommitSync()
}
@@ -97,11 +107,13 @@ func (app *appConnConsensus) CommitSync() (*types.ResponseCommit, error) {
// Implements AppConnMempool (subset of abcicli.Client)
type appConnMempool struct {
metrics *Metrics
appConn abcicli.Client
}
func NewAppConnMempool(appConn abcicli.Client) AppConnMempool {
func NewAppConnMempool(appConn abcicli.Client, metrics *Metrics) AppConnMempool {
return &appConnMempool{
metrics: metrics,
appConn: appConn,
}
}
@@ -115,18 +127,22 @@ func (app *appConnMempool) Error() error {
}
func (app *appConnMempool) FlushAsync() *abcicli.ReqRes {
defer addTimeSample(app.metrics.MethodTiming.With("method", "flush", "type", "async"))()
return app.appConn.FlushAsync()
}
func (app *appConnMempool) FlushSync() error {
defer addTimeSample(app.metrics.MethodTiming.With("method", "flush", "type", "sync"))()
return app.appConn.FlushSync()
}
func (app *appConnMempool) CheckTxAsync(req types.RequestCheckTx) *abcicli.ReqRes {
defer addTimeSample(app.metrics.MethodTiming.With("method", "check_tx", "type", "async"))()
return app.appConn.CheckTxAsync(req)
}
func (app *appConnMempool) CheckTxSync(req types.RequestCheckTx) (*types.ResponseCheckTx, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "check_tx", "type", "sync"))()
return app.appConn.CheckTxSync(req)
}
@@ -134,11 +150,13 @@ func (app *appConnMempool) CheckTxSync(req types.RequestCheckTx) (*types.Respons
// Implements AppConnQuery (subset of abcicli.Client)
type appConnQuery struct {
metrics *Metrics
appConn abcicli.Client
}
func NewAppConnQuery(appConn abcicli.Client) AppConnQuery {
func NewAppConnQuery(appConn abcicli.Client, metrics *Metrics) AppConnQuery {
return &appConnQuery{
metrics: metrics,
appConn: appConn,
}
}
@@ -148,14 +166,17 @@ func (app *appConnQuery) Error() error {
}
func (app *appConnQuery) EchoSync(msg string) (*types.ResponseEcho, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "echo", "type", "sync"))()
return app.appConn.EchoSync(msg)
}
func (app *appConnQuery) InfoSync(req types.RequestInfo) (*types.ResponseInfo, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "info", "type", "sync"))()
return app.appConn.InfoSync(req)
}
func (app *appConnQuery) QuerySync(reqQuery types.RequestQuery) (*types.ResponseQuery, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "query", "type", "sync"))()
return app.appConn.QuerySync(reqQuery)
}
@@ -163,11 +184,13 @@ func (app *appConnQuery) QuerySync(reqQuery types.RequestQuery) (*types.Response
// Implements AppConnSnapshot (subset of abcicli.Client)
type appConnSnapshot struct {
metrics *Metrics
appConn abcicli.Client
}
func NewAppConnSnapshot(appConn abcicli.Client) AppConnSnapshot {
func NewAppConnSnapshot(appConn abcicli.Client, metrics *Metrics) AppConnSnapshot {
return &appConnSnapshot{
metrics: metrics,
appConn: appConn,
}
}
@@ -177,19 +200,32 @@ func (app *appConnSnapshot) Error() error {
}
func (app *appConnSnapshot) ListSnapshotsSync(req types.RequestListSnapshots) (*types.ResponseListSnapshots, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "list_snapshots", "type", "sync"))()
return app.appConn.ListSnapshotsSync(req)
}
func (app *appConnSnapshot) OfferSnapshotSync(req types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "offer_snapshot", "type", "sync"))()
return app.appConn.OfferSnapshotSync(req)
}
func (app *appConnSnapshot) LoadSnapshotChunkSync(
req types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "load_snapshot_chunk", "type", "sync"))()
return app.appConn.LoadSnapshotChunkSync(req)
}
func (app *appConnSnapshot) ApplySnapshotChunkSync(
req types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error) {
defer addTimeSample(app.metrics.MethodTiming.With("method", "apply_snapshot_chunk", "type", "sync"))()
return app.appConn.ApplySnapshotChunkSync(req)
}
// addTimeSample returns a function that, when called, adds an observation to m.
// The observation added to m is the number of seconds ellapsed since addTimeSample
// was initially called. addTimeSample is meant to be called in a defer to calculate
// the amount of time a function takes to complete.
func addTimeSample(m metrics.Histogram) func() {
start := time.Now()
return func() { m.Observe(time.Since(start).Seconds()) }
}

47
proxy/metrics.go Normal file
View File

@@ -0,0 +1,47 @@
package proxy
import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"
"github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
)
const (
// MetricsSubsystem is a subsystem shared by all metrics exposed by this
// package.
MetricsSubsystem = "abci_connection"
)
// Metrics contains the prometheus metrics exposed by the proxy package.
type Metrics struct {
MethodTiming metrics.Histogram
}
// PrometheusMetrics constructs a Metrics instance that collects metrics samples.
// The resulting metrics will be prefixed with namespace and labeled with the
// defaultLabelsAndValues. defaultLabelsAndValues must be a list of string pairs
// where the first of each pair is the label and the second is the value.
func PrometheusMetrics(namespace string, defaultLabelsAndValues ...string) *Metrics {
defaultLabels := []string{}
for i := 0; i < len(defaultLabelsAndValues); i += 2 {
defaultLabels = append(defaultLabels, defaultLabelsAndValues[i])
}
return &Metrics{
MethodTiming: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "method_timing_seconds",
Help: "ABCI Method Timing",
Buckets: []float64{.0001, .0004, .002, .009, .02, .1, .65, 2, 6, 25},
}, append(defaultLabels, []string{"method", "type"}...)).With(defaultLabelsAndValues...),
}
}
// NopMetrics constructs a Metrics instance that discards all samples and is suitable
// for testing.
func NopMetrics() *Metrics {
return &Metrics{
MethodTiming: discard.NewHistogram(),
}
}

View File

@@ -32,8 +32,8 @@ type AppConns interface {
}
// NewAppConns calls NewMultiAppConn.
func NewAppConns(clientCreator ClientCreator) AppConns {
return NewMultiAppConn(clientCreator)
func NewAppConns(clientCreator ClientCreator, metrics *Metrics) AppConns {
return NewMultiAppConn(clientCreator, metrics)
}
// multiAppConn implements AppConns.
@@ -44,6 +44,7 @@ func NewAppConns(clientCreator ClientCreator) AppConns {
type multiAppConn struct {
service.BaseService
metrics *Metrics
consensusConn AppConnConsensus
mempoolConn AppConnMempool
queryConn AppConnQuery
@@ -58,8 +59,9 @@ type multiAppConn struct {
}
// NewMultiAppConn makes all necessary abci connections to the application.
func NewMultiAppConn(clientCreator ClientCreator) AppConns {
func NewMultiAppConn(clientCreator ClientCreator, metrics *Metrics) AppConns {
multiAppConn := &multiAppConn{
metrics: metrics,
clientCreator: clientCreator,
}
multiAppConn.BaseService = *service.NewBaseService(nil, "multiAppConn", multiAppConn)
@@ -88,7 +90,7 @@ func (app *multiAppConn) OnStart() error {
return err
}
app.queryConnClient = c
app.queryConn = NewAppConnQuery(c)
app.queryConn = NewAppConnQuery(c, app.metrics)
c, err = app.abciClientFor(connSnapshot)
if err != nil {
@@ -96,7 +98,7 @@ func (app *multiAppConn) OnStart() error {
return err
}
app.snapshotConnClient = c
app.snapshotConn = NewAppConnSnapshot(c)
app.snapshotConn = NewAppConnSnapshot(c, app.metrics)
c, err = app.abciClientFor(connMempool)
if err != nil {
@@ -104,7 +106,7 @@ func (app *multiAppConn) OnStart() error {
return err
}
app.mempoolConnClient = c
app.mempoolConn = NewAppConnMempool(c)
app.mempoolConn = NewAppConnMempool(c, app.metrics)
c, err = app.abciClientFor(connConsensus)
if err != nil {
@@ -112,7 +114,7 @@ func (app *multiAppConn) OnStart() error {
return err
}
app.consensusConnClient = c
app.consensusConn = NewAppConnConsensus(c)
app.consensusConn = NewAppConnConsensus(c, app.metrics)
// Kill Tendermint if the ABCI application crashes.
go app.killTMOnClientError()

View File

@@ -28,7 +28,7 @@ func TestAppConns_Start_Stop(t *testing.T) {
clientCreatorMock.On("NewABCIClient").Return(clientMock, nil).Times(4)
appConns := NewAppConns(clientCreatorMock)
appConns := NewAppConns(clientCreatorMock, NopMetrics())
err := appConns.Start()
require.NoError(t, err)
@@ -68,7 +68,7 @@ func TestAppConns_Failure(t *testing.T) {
clientCreatorMock.On("NewABCIClient").Return(clientMock, nil)
appConns := NewAppConns(clientCreatorMock)
appConns := NewAppConns(clientCreatorMock, NopMetrics())
err := appConns.Start()
require.NoError(t, err)

View File

@@ -35,7 +35,7 @@ var (
func TestApplyBlock(t *testing.T) {
app := &testApp{}
cc := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests
@@ -61,7 +61,7 @@ func TestApplyBlock(t *testing.T) {
func TestBeginBlockValidators(t *testing.T) {
app := &testApp{}
cc := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // no need to check error again
@@ -124,7 +124,7 @@ func TestBeginBlockValidators(t *testing.T) {
func TestBeginBlockByzantineValidators(t *testing.T) {
app := &testApp{}
cc := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests
@@ -348,7 +348,7 @@ func TestUpdateValidators(t *testing.T) {
func TestEndBlockValidatorUpdates(t *testing.T) {
app := &testApp{}
cc := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests
@@ -419,7 +419,7 @@ func TestEndBlockValidatorUpdates(t *testing.T) {
func TestEndBlockValidatorUpdatesResultingInEmptySet(t *testing.T) {
app := &testApp{}
cc := proxy.NewLocalClientCreator(app)
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests

View File

@@ -27,7 +27,7 @@ type paramsChangeTestCase struct {
func newTestApp() proxy.AppConns {
app := &testApp{}
cc := proxy.NewLocalClientCreator(app)
return proxy.NewAppConns(cc)
return proxy.NewAppConns(cc, proxy.NopMetrics())
}
func makeAndCommitGoodBlock(