proxy: add initial set of abci metrics (port of #7115) (#9169)

* internal/proxy: add initial set of abci metrics (#7115)

This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods.

An example of these metrics is included here for reference:
```
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13
tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001
tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13
```

These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.

* fixup

* fixups

* docs: add abci timing metrics to the metrics docs (#7311)

* format table
This commit is contained in:
William Banfield
2022-08-05 13:29:00 -04:00
committed by GitHub
parent 6b499aeb31
commit b92a19b2ce
14 changed files with 128 additions and 41 deletions

View File

@@ -113,19 +113,20 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) {
}
// MetricsProvider returns a consensus, p2p and mempool Metrics.
type MetricsProvider func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics)
type MetricsProvider func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics, *proxy.Metrics)
// DefaultMetricsProvider returns Metrics build using Prometheus client library
// if Prometheus is enabled. Otherwise, it returns no-op Metrics.
func DefaultMetricsProvider(config *cfg.InstrumentationConfig) MetricsProvider {
return func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) {
return func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics, *proxy.Metrics) {
if config.Prometheus {
return cs.PrometheusMetrics(config.Namespace, "chain_id", chainID),
p2p.PrometheusMetrics(config.Namespace, "chain_id", chainID),
mempl.PrometheusMetrics(config.Namespace, "chain_id", chainID),
sm.PrometheusMetrics(config.Namespace, "chain_id", chainID)
sm.PrometheusMetrics(config.Namespace, "chain_id", chainID),
proxy.PrometheusMetrics(config.Namespace, "chain_id", chainID)
}
return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics()
return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics(), proxy.NopMetrics()
}
}
@@ -246,8 +247,8 @@ func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.Block
return
}
func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger) (proxy.AppConns, error) {
proxyApp := proxy.NewAppConns(clientCreator)
func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger, metrics *proxy.Metrics) (proxy.AppConns, error) {
proxyApp := proxy.NewAppConns(clientCreator, metrics)
proxyApp.SetLogger(logger.With("module", "proxy"))
if err := proxyApp.Start(); err != nil {
return nil, fmt.Errorf("error starting proxy app connections: %v", err)
@@ -720,8 +721,10 @@ func NewNode(config *cfg.Config,
return nil, err
}
csMetrics, p2pMetrics, memplMetrics, smMetrics, abciMetrics := metricsProvider(genDoc.ChainID)
// Create the proxyApp and establish connections to the ABCI app (consensus, mempool, query).
proxyApp, err := createAndStartProxyAppConns(clientCreator, logger)
proxyApp, err := createAndStartProxyAppConns(clientCreator, logger, abciMetrics)
if err != nil {
return nil, err
}
@@ -786,8 +789,6 @@ func NewNode(config *cfg.Config,
logNodeStartupInfo(state, pubKey, logger, consensusLogger)
csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID)
// Make MempoolReactor
mempool, mempoolReactor := createMempoolAndMempoolReactor(config, proxyApp, state, memplMetrics, logger)

View File

@@ -226,7 +226,7 @@ func TestCreateProposalBlock(t *testing.T) {
config := cfg.ResetTestRoot("node_create_proposal")
defer os.RemoveAll(config.RootDir)
cc := proxy.NewLocalClientCreator(kvstore.NewApplication())
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests
@@ -331,7 +331,7 @@ func TestMaxProposalBlockSize(t *testing.T) {
config := cfg.ResetTestRoot("node_create_proposal")
defer os.RemoveAll(config.RootDir)
cc := proxy.NewLocalClientCreator(kvstore.NewApplication())
proxyApp := proxy.NewAppConns(cc)
proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics())
err := proxyApp.Start()
require.Nil(t, err)
defer proxyApp.Stop() //nolint:errcheck // ignore for tests