Compare commits
45 Commits
sam/abci++
...
wb/issue-9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06050e4324 | ||
|
|
4fbfea79ad | ||
|
|
241be64da5 | ||
|
|
2c40ca52c1 | ||
|
|
3136b7a084 | ||
|
|
c755d03611 | ||
|
|
7a2e44184b | ||
|
|
77e7318613 | ||
|
|
caa75ae791 | ||
|
|
af2981a2f7 | ||
|
|
f9307cac51 | ||
|
|
3bd2153136 | ||
|
|
301211c2cb | ||
|
|
58ee42ca52 | ||
|
|
6e38fff9ed | ||
|
|
93ab364abc | ||
|
|
1c60efc0bc | ||
|
|
6768b98568 | ||
|
|
3cdfbda2eb | ||
|
|
4552cfc271 | ||
|
|
91fba07e49 | ||
|
|
38d1b2f873 | ||
|
|
e4fb662c8d | ||
|
|
810b9c613b | ||
|
|
341cabec0e | ||
|
|
5b98095ac3 | ||
|
|
59b28e71a0 | ||
|
|
071d787a45 | ||
|
|
a54a424478 | ||
|
|
c961fb58eb | ||
|
|
c7140bf817 | ||
|
|
5df9c410ff | ||
|
|
c8f203293d | ||
|
|
b06e1cea54 | ||
|
|
6ea968d576 | ||
|
|
b42c439776 | ||
|
|
387bf6795a | ||
|
|
4f3e87b2e4 | ||
|
|
a371b1e3a8 | ||
|
|
9dd99e9294 | ||
|
|
4fd19a275e | ||
|
|
8d26460f9d | ||
|
|
c0bdb2423a | ||
|
|
cdd3479f20 | ||
|
|
b1dc5a6def |
6
.github/workflows/docker.yml
vendored
@@ -41,17 +41,17 @@ jobs:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Build
|
||||
uses: docker/setup-buildx-action@v2.0.0
|
||||
uses: docker/setup-buildx-action@v2.1.0
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
uses: docker/login-action@v2.0.0
|
||||
uses: docker/login-action@v2.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Publish to Docker Hub
|
||||
uses: docker/build-push-action@v3.1.1
|
||||
uses: docker/build-push-action@v3.2.0
|
||||
with:
|
||||
context: .
|
||||
file: ./DOCKER/Dockerfile
|
||||
|
||||
4
.github/workflows/e2e-nightly-34x.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
4
.github/workflows/e2e-nightly-37x.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
4
.github/workflows/e2e-nightly-main.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
2
.github/workflows/fuzz-nightly.yml
vendored
@@ -76,7 +76,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
|
||||
2
.github/workflows/janitor.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
steps:
|
||||
- uses: styfle/cancel-workflow-action@0.10.1
|
||||
- uses: styfle/cancel-workflow-action@0.11.0
|
||||
with:
|
||||
workflow_id: 1041851,1401230,2837803
|
||||
access_token: ${{ github.token }}
|
||||
|
||||
2
.gitignore
vendored
@@ -55,3 +55,5 @@ proto/spec/**/*.pb.go
|
||||
*.pdf
|
||||
*.gz
|
||||
*.dvi
|
||||
# Python virtual environments
|
||||
.venv
|
||||
|
||||
30
CHANGELOG.md
@@ -2,6 +2,36 @@
|
||||
|
||||
Friendly reminder, we have a [bug bounty program](https://hackerone.com/cosmos).
|
||||
|
||||
## v0.34.22
|
||||
|
||||
This release includes several bug fixes, [one of
|
||||
which](https://github.com/tendermint/tendermint/pull/9518) we discovered while
|
||||
building up a baseline for v0.34 against which to compare our upcoming v0.37
|
||||
release during our [QA process](./docs/qa/).
|
||||
|
||||
Special thanks to external contributors on this release: @RiccardoM
|
||||
|
||||
### FEATURES
|
||||
|
||||
- [rpc] [\#9423](https://github.com/tendermint/tendermint/pull/9423) Support
|
||||
HTTPS URLs from the WebSocket client (@RiccardoM, @cmwaters)
|
||||
|
||||
### BUG FIXES
|
||||
|
||||
- [config] [\#9483](https://github.com/tendermint/tendermint/issues/9483)
|
||||
Calling `tendermint init` would incorrectly leave out the new `[storage]`
|
||||
section delimiter in the generated configuration file - this has now been
|
||||
fixed
|
||||
- [p2p] [\#9500](https://github.com/tendermint/tendermint/issues/9500) Prevent
|
||||
peers who have errored being added to the peer set (@jmalicevic)
|
||||
- [indexer] [\#9473](https://github.com/tendermint/tendermint/issues/9473) Fix
|
||||
bug that caused the psql indexer to index empty blocks whenever one of the
|
||||
transactions returned a non zero code. The relevant deduplication logic has
|
||||
been moved within the kv indexer only (@cmwaters)
|
||||
- [blocksync] [\#9518](https://github.com/tendermint/tendermint/issues/9518) A
|
||||
block sync stall was observed during our QA process whereby the node was
|
||||
unable to make progress. Retrying block requests after a timeout fixes this.
|
||||
|
||||
## v0.34.21
|
||||
|
||||
Release highlights include:
|
||||
|
||||
@@ -96,3 +96,4 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
|
||||
|
||||
- [consensus] \#9229 fix round number of `enterPropose` when handling `RoundStepNewRound` timeout. (@fatcat22)
|
||||
- [docker] \#9073 enable cross platform build using docker buildx
|
||||
- [blocksync] \#9518 handle the case when the sending queue is full: retry block request after a timeout
|
||||
@@ -12,7 +12,7 @@ and hence to Tendermint.
|
||||
* We are committed to providing a friendly, safe and welcoming environment for
|
||||
all, regardless of level of experience, gender, gender identity and
|
||||
expression, sexual orientation, disability, personal appearance, body size,
|
||||
race, ethnicity, age, religion, nationality, or other similar characteristic.
|
||||
race, ethnicity, age, religion, nationality, or other similar characteristics.
|
||||
|
||||
* On Slack, please avoid using overtly sexual nicknames or other nicknames that
|
||||
might detract from a friendly, safe and welcoming environment for all.
|
||||
|
||||
@@ -12,7 +12,7 @@ landing changes in `main`.
|
||||
All work on the code base should be motivated by a [Github
|
||||
Issue](https://github.com/tendermint/tendermint/issues).
|
||||
[Search](https://github.com/tendermint/tendermint/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22)
|
||||
is a good place start when looking for places to contribute. If you
|
||||
is a good place to start when looking for places to contribute. If you
|
||||
would like to work on an issue which already exists, please indicate so
|
||||
by leaving a comment.
|
||||
|
||||
@@ -213,7 +213,7 @@ Changes with multiple classifications should be doubly included (eg. a bug fix
|
||||
that is also a breaking change should be recorded under both).
|
||||
|
||||
Breaking changes are further subdivided according to the APIs/users they impact.
|
||||
Any change that effects multiple APIs/users should be recorded multiply - for
|
||||
Any change that affects multiple APIs/users should be recorded multiply - for
|
||||
instance, a change to the `Blockchain Protocol` that removes a field from the
|
||||
header should also be recorded under `CLI/RPC/Config` since the field will be
|
||||
removed from the header in RPC responses as well.
|
||||
@@ -247,7 +247,7 @@ To begin contributing, create a development branch either on `github.com/tenderm
|
||||
Make changes, and before submitting a pull request, update the `CHANGELOG_PENDING.md` to record your change. Also, run either `git rebase` or `git merge` on top of the latest `main`. (Since pull requests are squash-merged, either is fine!)
|
||||
|
||||
Update the `UPGRADING.md` if the change you've made is breaking and the
|
||||
instructions should be in place for a user on how he/she can upgrade it's
|
||||
instructions should be in place for a user on how he/she can upgrade its
|
||||
software (ABCI application, Tendermint-based blockchain, light client, wallet).
|
||||
|
||||
Once you have submitted a pull request label the pull request with either `R:minor`, if the change should be included in the next minor release, or `R:major`, if the change is meant for a major release.
|
||||
|
||||
2
Makefile
@@ -271,7 +271,7 @@ format:
|
||||
|
||||
lint:
|
||||
@echo "--> Running linter"
|
||||
@golangci-lint run
|
||||
@go run github.com/golangci/golangci-lint/cmd/golangci-lint run
|
||||
.PHONY: lint
|
||||
|
||||
DESTINATION = ./index.html.md
|
||||
|
||||
@@ -80,7 +80,7 @@ func TestBcStatusResponseMessageValidateBasic(t *testing.T) {
|
||||
}
|
||||
|
||||
//nolint:lll // ignore line length in tests
|
||||
func TestBlockchainMessageVectors(t *testing.T) {
|
||||
func TestBlocksyncMessageVectors(t *testing.T) {
|
||||
block := types.MakeBlock(int64(3), []types.Tx{types.Tx("Hello World")}, nil, nil)
|
||||
block.Version.Block = 11 // overwrite updated protocol version
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ const (
|
||||
maxTotalRequesters = 600
|
||||
maxPendingRequests = maxTotalRequesters
|
||||
maxPendingRequestsPerPeer = 20
|
||||
requestRetrySeconds = 30
|
||||
|
||||
// Minimum recv rate to ensure we're receiving blocks from a peer fast
|
||||
// enough. If a peer is not sending us data at at least that rate, we
|
||||
@@ -602,7 +603,7 @@ OUTER_LOOP:
|
||||
}
|
||||
peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
|
||||
if peer == nil {
|
||||
// log.Info("No peers available", "height", height)
|
||||
bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height)
|
||||
time.Sleep(requestIntervalMS * time.Millisecond)
|
||||
continue PICK_PEER_LOOP
|
||||
}
|
||||
@@ -612,6 +613,7 @@ OUTER_LOOP:
|
||||
bpr.peerID = peer.id
|
||||
bpr.mtx.Unlock()
|
||||
|
||||
to := time.NewTimer(requestRetrySeconds * time.Second)
|
||||
// Send request and wait.
|
||||
bpr.pool.sendRequest(bpr.height, peer.id)
|
||||
WAIT_LOOP:
|
||||
@@ -624,6 +626,11 @@ OUTER_LOOP:
|
||||
return
|
||||
case <-bpr.Quit():
|
||||
return
|
||||
case <-to.C:
|
||||
bpr.Logger.Debug("Retrying block request after timeout", "height", bpr.height, "peer", bpr.peerID)
|
||||
// Simulate a redo
|
||||
bpr.reset()
|
||||
continue OUTER_LOOP
|
||||
case peerID := <-bpr.redoCh:
|
||||
if peerID == bpr.peerID {
|
||||
bpr.reset()
|
||||
|
||||
@@ -30,7 +30,7 @@ const (
|
||||
)
|
||||
|
||||
type consensusReactor interface {
|
||||
// for when we switch from blockchain reactor and block sync to
|
||||
// for when we switch from blocksync reactor and block sync to
|
||||
// the consensus machine
|
||||
SwitchToConsensus(state sm.State, skipWAL bool)
|
||||
}
|
||||
|
||||
@@ -146,13 +146,13 @@ func newReactor(
|
||||
}
|
||||
|
||||
bcReactor := NewReactor(state.Copy(), blockExec, blockStore, fastSync)
|
||||
bcReactor.SetLogger(logger.With("module", "blockchain"))
|
||||
bcReactor.SetLogger(logger.With("module", "blocksync"))
|
||||
|
||||
return ReactorPair{bcReactor, proxyApp}
|
||||
}
|
||||
|
||||
func TestNoBlockResponse(t *testing.T) {
|
||||
config = test.ResetTestRoot("blockchain_reactor_test")
|
||||
config = test.ResetTestRoot("blocksync_reactor_test")
|
||||
defer os.RemoveAll(config.RootDir)
|
||||
genDoc, privVals := randGenesisDoc(1, false, 30)
|
||||
|
||||
@@ -164,7 +164,7 @@ func TestNoBlockResponse(t *testing.T) {
|
||||
reactorPairs[1] = newReactor(t, log.TestingLogger(), genDoc, privVals, 0)
|
||||
|
||||
p2p.MakeConnectedSwitches(config.P2P, 2, func(i int, s *p2p.Switch) *p2p.Switch {
|
||||
s.AddReactor("BLOCKCHAIN", reactorPairs[i].reactor)
|
||||
s.AddReactor("BLOCKSYNC", reactorPairs[i].reactor)
|
||||
return s
|
||||
|
||||
}, p2p.Connect2Switches)
|
||||
@@ -214,7 +214,7 @@ func TestNoBlockResponse(t *testing.T) {
|
||||
// Alternatively we could actually dial a TCP conn but
|
||||
// that seems extreme.
|
||||
func TestBadBlockStopsPeer(t *testing.T) {
|
||||
config = test.ResetTestRoot("blockchain_reactor_test")
|
||||
config = test.ResetTestRoot("blocksync_reactor_test")
|
||||
defer os.RemoveAll(config.RootDir)
|
||||
genDoc, privVals := randGenesisDoc(1, false, 30)
|
||||
|
||||
@@ -239,7 +239,7 @@ func TestBadBlockStopsPeer(t *testing.T) {
|
||||
reactorPairs[3] = newReactor(t, log.TestingLogger(), genDoc, privVals, 0)
|
||||
|
||||
switches := p2p.MakeConnectedSwitches(config.P2P, 4, func(i int, s *p2p.Switch) *p2p.Switch {
|
||||
s.AddReactor("BLOCKCHAIN", reactorPairs[i].reactor)
|
||||
s.AddReactor("BLOCKSYNC", reactorPairs[i].reactor)
|
||||
return s
|
||||
|
||||
}, p2p.Connect2Switches)
|
||||
@@ -278,7 +278,7 @@ func TestBadBlockStopsPeer(t *testing.T) {
|
||||
reactorPairs = append(reactorPairs, lastReactorPair)
|
||||
|
||||
switches = append(switches, p2p.MakeConnectedSwitches(config.P2P, 1, func(i int, s *p2p.Switch) *p2p.Switch {
|
||||
s.AddReactor("BLOCKCHAIN", reactorPairs[len(reactorPairs)-1].reactor)
|
||||
s.AddReactor("BLOCKSYNC", reactorPairs[len(reactorPairs)-1].reactor)
|
||||
return s
|
||||
|
||||
}, p2p.Connect2Switches)...)
|
||||
|
||||
@@ -703,9 +703,6 @@ type MempoolConfig struct {
|
||||
// Mempool version to use:
|
||||
// 1) "v0" - (default) FIFO mempool.
|
||||
// 2) "v1" - prioritized mempool.
|
||||
// WARNING: There's a known memory leak with the prioritized mempool
|
||||
// that the team are working on. Read more here:
|
||||
// https://github.com/tendermint/tendermint/issues/8775
|
||||
Version string `mapstructure:"version"`
|
||||
RootDir string `mapstructure:"home"`
|
||||
Recheck bool `mapstructure:"recheck"`
|
||||
|
||||
@@ -99,4 +99,4 @@ configuration file that we can update with PRs.
|
||||
Because the build processes are identical (as is the information contained
|
||||
herein), this file should be kept in sync as much as possible with its
|
||||
[counterpart in the Cosmos SDK
|
||||
repo](https://github.com/cosmos/cosmos-sdk/blob/master/docs/DOCS_README.md).
|
||||
repo](https://github.com/cosmos/cosmos-sdk/blob/main/docs/README.md).
|
||||
|
||||
@@ -61,7 +61,7 @@ The following protocols and application features require a reliable source of ti
|
||||
* Tendermint Light Clients [rely on correspondence between their known time](https://github.com/tendermint/tendermint/blob/main/spec/light-client/verification/README.md#definitions-1) and the block time for block verification.
|
||||
* Tendermint Evidence validity is determined [either in terms of heights or in terms of time](https://github.com/tendermint/tendermint/blob/8029cf7a0fcc89a5004e173ec065aa48ad5ba3c8/spec/consensus/evidence.md#verification).
|
||||
* Unbonding of staked assets in the Cosmos Hub [occurs after a period of 21 days](https://github.com/cosmos/governance/blob/ce75de4019b0129f6efcbb0e752cd2cc9e6136d3/params-change/Staking.md#unbondingtime).
|
||||
* IBC packets can use either a [timestamp or a height to timeout packet delivery](https://docs.cosmos.network/v0.44/ibc/overview.html#acknowledgements)
|
||||
* IBC packets can use either a [timestamp or a height to timeout packet delivery](https://docs.cosmos.network/v0.45/ibc/overview.html#acknowledgements)
|
||||
|
||||
Finally, inflation distribution in the Cosmos Hub uses an approximation of time to calculate an annual percentage rate.
|
||||
This approximation of time is calculated using [block heights with an estimated number of blocks produced in a year](https://github.com/cosmos/governance/blob/master/params-change/Mint.md#blocksperyear).
|
||||
|
||||
23
docs/qa/README.md
Normal file
@@ -0,0 +1,23 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance
|
||||
description: This is a report on the process followed and results obtained when running v0.34.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# Tendermint Quality Assurance
|
||||
|
||||
This directory keeps track of the process followed by the Tendermint Core team
|
||||
for Quality Assurance before cutting a release.
|
||||
This directory is to live in multiple branches. On each release branch,
|
||||
the contents of this directory reflect the status of the process
|
||||
at the time the Quality Assurance process was applied for that release.
|
||||
|
||||
File [method](./method.md) keeps track of the process followed to obtain the results
|
||||
used to decide if a release is passing the Quality Assurance process.
|
||||
The results obtained in each release are stored in their own directory.
|
||||
The following releases have undergone the Quality Assurance process:
|
||||
|
||||
* [v0.34.x](./v034/), which was tested just before releasing v0.34.22
|
||||
* [v0.37.x](./v037/), with v.34.x acting as a baseline
|
||||
214
docs/qa/method.md
Normal file
@@ -0,0 +1,214 @@
|
||||
---
|
||||
order: 1
|
||||
title: Method
|
||||
---
|
||||
|
||||
# Method
|
||||
|
||||
This document provides a detailed description of the QA process.
|
||||
It is intended to be used by engineers reproducing the experimental setup for future tests of Tendermint.
|
||||
|
||||
The (first iteration of the) QA process as described [in the RELEASES.md document][releases]
|
||||
was applied to version v0.34.x in order to have a set of results acting as benchmarking baseline.
|
||||
This baseline is then compared with results obtained in later versions.
|
||||
|
||||
Out of the testnet-based test cases described in [the releases document][releases] we focused on two of them:
|
||||
_200 Node Test_, and _Rotating Nodes Test_.
|
||||
|
||||
[releases]: https://github.com/tendermint/tendermint/blob/v0.37.x/RELEASES.md#large-scale-testnets
|
||||
|
||||
## Software Dependencies
|
||||
|
||||
### Infrastructure Requirements to Run the Tests
|
||||
|
||||
* An account at Digital Ocean (DO), with a high droplet limit (>202)
|
||||
* The machine to orchestrate the tests should have the following installed:
|
||||
* A clone of the [testnet repository][testnet-repo]
|
||||
* This repository contains all the scripts mentioned in the reminder of this section
|
||||
* [Digital Ocean CLI][doctl]
|
||||
* [Terraform CLI][Terraform]
|
||||
* [Ansible CLI][Ansible]
|
||||
|
||||
[testnet-repo]: https://github.com/interchainio/tendermint-testnet
|
||||
[Ansible]: https://docs.ansible.com/ansible/latest/index.html
|
||||
[Terraform]: https://www.terraform.io/docs
|
||||
[doctl]: https://docs.digitalocean.com/reference/doctl/how-to/install/
|
||||
|
||||
### Requirements for Result Extraction
|
||||
|
||||
* Matlab or Octave
|
||||
* [Prometheus][prometheus] server installed
|
||||
* blockstore DB of one of the full nodes in the testnet
|
||||
* Prometheus DB
|
||||
|
||||
[prometheus]: https://prometheus.io/
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Running the test
|
||||
|
||||
This section explains how the tests were carried out for reproducibility purposes.
|
||||
|
||||
1. [If you haven't done it before]
|
||||
Follow steps 1-4 of the `README.md` at the top of the testnet repository to configure Terraform, and `doctl`.
|
||||
2. Copy file `testnets/testnet200.toml` onto `testnet.toml` (do NOT commit this change)
|
||||
3. Set the variable `VERSION_TAG` in the `Makefile` to the git hash that is to be tested.
|
||||
4. Follow steps 5-10 of the `README.md` to configure and start the 200 node testnet
|
||||
* WARNING: Do NOT forget to run `make terraform-destroy` as soon as you are done with the tests (see step 9)
|
||||
5. As a sanity check, connect to the Prometheus node's web interface and check the graph for the `tendermint_consensus_height` metric.
|
||||
All nodes should be increasing their heights.
|
||||
6. `ssh` into the `testnet-load-runner`, then copy script `script/200-node-loadscript.sh` and run it from the load runner node.
|
||||
* Before running it, you need to edit the script to provide the IP address of a full node.
|
||||
This node will receive all transactions from the load runner node.
|
||||
* This script will take about 40 mins to run
|
||||
* It is running 90-seconds-long experiments in a loop with different loads
|
||||
7. Run `make retrieve-data` to gather all relevant data from the testnet into the orchestrating machine
|
||||
8. Verify that the data was collected without errors
|
||||
* at least one blockstore DB for a Tendermint validator
|
||||
* the Prometheus database from the Prometheus node
|
||||
* for extra care, you can run `zip -T` on the `prometheus.zip` file and (one of) the `blockstore.db.zip` file(s)
|
||||
9. **Run `make terraform-destroy`**
|
||||
* Don't forget to type `yes`! Otherwise you're in trouble.
|
||||
|
||||
### Result Extraction
|
||||
|
||||
The method for extracting the results described here is highly manual (and exploratory) at this stage.
|
||||
The Core team should improve it at every iteration to increase the amount of automation.
|
||||
|
||||
#### Steps
|
||||
|
||||
1. Unzip the blockstore into a directory
|
||||
2. Extract the latency report and the raw latencies for all the experiments. Run these commands from the directory containing the blockstore
|
||||
* `go run github.com/tendermint/tendermint/test/loadtime/cmd/report@3ec6e424d --database-type goleveldb --data-dir ./ > results/report.txt`
|
||||
* `go run github.com/tendermint/tendermint/test/loadtime/cmd/report@3ec6e424d --database-type goleveldb --data-dir ./ --csv results/raw.csv`
|
||||
3. File `report.txt` contains an unordered list of experiments with varying concurrent connections and transaction rate
|
||||
* Create files `report01.txt`, `report02.txt`, `report04.txt` and, for each experiment in file `report.txt`,
|
||||
copy its related lines to the filename that matches the number of connections.
|
||||
* Sort the experiments in `report01.txt` in ascending tx rate order. Likewise for `report02.txt` and `report04.txt`.
|
||||
4. Generate file `report_tabbed.txt` by showing the contents `report01.txt`, `report02.txt`, `report04.txt` side by side
|
||||
* This effectively creates a table where rows are a particular tx rate and columns are a particular number of websocket connections.
|
||||
5. Extract the raw latencies from file `raw.csv` using the following bash loop. This creates a `.csv` file and a `.dat` file per experiment.
|
||||
The format of the `.dat` files is amenable to loading them as matrices in Octave
|
||||
|
||||
```bash
|
||||
uuids=($(cat report01.txt report02.txt report04.txt | grep '^Experiment ID: ' | awk '{ print $3 }'))
|
||||
c=1
|
||||
for i in 01 02 04; do
|
||||
for j in 0025 0050 0100 0200; do
|
||||
echo $i $j $c "${uuids[$c]}"
|
||||
filename=c${i}_r${j}
|
||||
grep ${uuids[$c]} raw.csv > ${filename}.csv
|
||||
cat ${filename}.csv | tr , ' ' | awk '{ print $2, $3 }' > ${filename}.dat
|
||||
c=$(expr $c + 1)
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
6. Enter Octave
|
||||
7. Load all `.dat` files generated in step 5 into matrices using this Octave code snippet
|
||||
|
||||
```octave
|
||||
conns = { "01"; "02"; "04" };
|
||||
rates = { "0025"; "0050"; "0100"; "0200" };
|
||||
for i = 1:length(conns)
|
||||
for j = 1:length(rates)
|
||||
filename = strcat("c", conns{i}, "_r", rates{j}, ".dat");
|
||||
load("-ascii", filename);
|
||||
endfor
|
||||
endfor
|
||||
```
|
||||
|
||||
8. Set variable release to the current release undergoing QA
|
||||
|
||||
```octave
|
||||
release = "v0.34.x";
|
||||
```
|
||||
|
||||
9. Generate a plot with all (or some) experiments, where the X axis is the experiment time,
|
||||
and the y axis is the latency of transactions.
|
||||
The following snippet plots all experiments.
|
||||
|
||||
```octave
|
||||
legends = {};
|
||||
hold off;
|
||||
for i = 1:length(conns)
|
||||
for j = 1:length(rates)
|
||||
data_name = strcat("c", conns{i}, "_r", rates{j});
|
||||
l = strcat("c=", conns{i}, " r=", rates{j});
|
||||
m = eval(data_name); plot((m(:,1) - min(m(:,1))) / 1e+9, m(:,2) / 1e+9, ".");
|
||||
hold on;
|
||||
legends(1, end+1) = l;
|
||||
endfor
|
||||
endfor
|
||||
legend(legends, "location", "northeastoutside");
|
||||
xlabel("experiment time (s)");
|
||||
ylabel("latency (s)");
|
||||
t = sprintf("200-node testnet - %s", release);
|
||||
title(t);
|
||||
```
|
||||
|
||||
10. Consider adjusting the axis, in case you want to compare your results to the baseline, for instance
|
||||
|
||||
```octave
|
||||
axis([0, 100, 0, 30], "tic");
|
||||
```
|
||||
|
||||
11. Use Octave's GUI menu to save the plot (e.g. as `.png`)
|
||||
|
||||
12. Repeat steps 9 and 10 to obtain as many plots as deemed necessary.
|
||||
|
||||
13. To generate a latency vs throughput plot, using the raw CSV file generated
|
||||
in step 2, follow the instructions for the [`latency_throughput.py`] script.
|
||||
|
||||
[`latency_throughput.py`]: ../../scripts/qa/reporting/README.md
|
||||
|
||||
#### Extracting Prometheus Metrics
|
||||
|
||||
1. Stop the prometheus server if it is running as a service (e.g. a `systemd` unit).
|
||||
2. Unzip the prometheus database retrieved from the testnet, and move it to replace the
|
||||
local prometheus database.
|
||||
3. Start the prometheus server and make sure no error logs appear at start up.
|
||||
4. Introduce the metrics you want to gather or plot.
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
### Running the test
|
||||
|
||||
This section explains how the tests were carried out for reproducibility purposes.
|
||||
|
||||
1. [If you haven't done it before]
|
||||
Follow steps 1-4 of the `README.md` at the top of the testnet repository to configure Terraform, and `doctl`.
|
||||
2. Copy file `testnet_rotating.toml` onto `testnet.toml` (do NOT commit this change)
|
||||
3. Set variable `VERSION_TAG` to the git hash that is to be tested.
|
||||
4. Run `make terraform-apply EPHEMERAL_SIZE=25`
|
||||
* WARNING: Do NOT forget to run `make terraform-destroy` as soon as you are done with the tests
|
||||
5. Follow steps 6-10 of the `README.md` to configure and start the "stable" part of the rotating node testnet
|
||||
6. As a sanity check, connect to the Prometheus node's web interface and check the graph for the `tendermint_consensus_height` metric.
|
||||
All nodes should be increasing their heights.
|
||||
7. On a different shell,
|
||||
* run `make runload ROTATE_CONNECTIONS=X ROTATE_TX_RATE=Y`
|
||||
* `X` and `Y` should reflect a load below the saturation point (see, e.g.,
|
||||
[this paragraph](./v034/README.md#finding-the-saturation-point) for further info)
|
||||
8. Run `make rotate` to start the script that creates the ephemeral nodes, and kills them when they are caught up.
|
||||
* WARNING: If you run this command from your laptop, the laptop needs to be up and connected for full length
|
||||
of the experiment.
|
||||
9. When the height of the chain reaches 3000, stop the `make rotate` script
|
||||
10. When the rotate script has made two iterations (i.e., all ephemeral nodes have caught up twice)
|
||||
after height 3000 was reached, stop `make rotate`
|
||||
11. Run `make retrieve-data` to gather all relevant data from the testnet into the orchestrating machine
|
||||
12. Verify that the data was collected without errors
|
||||
* at least one blockstore DB for a Tendermint validator
|
||||
* the Prometheus database from the Prometheus node
|
||||
* for extra care, you can run `zip -T` on the `prometheus.zip` file and (one of) the `blockstore.db.zip` file(s)
|
||||
13. **Run `make terraform-destroy`**
|
||||
|
||||
Steps 8 to 10 are highly manual at the moment and will be improved in next iterations.
|
||||
|
||||
### Result Extraction
|
||||
|
||||
In order to obtain a latency plot, follow the instructions above for the 200 node experiment, but:
|
||||
|
||||
* The `results.txt` file contains only one experiment
|
||||
* Therefore, no need for any `for` loops
|
||||
|
||||
As for prometheus, the same method as for the 200 node experiment can be applied.
|
||||
278
docs/qa/v034/README.md
Normal file
@@ -0,0 +1,278 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance Results for v0.34.x
|
||||
description: This is a report on the results obtained when running v0.34.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# v0.34.x
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Finding the Saturation Point
|
||||
|
||||
The first goal when examining the results of the tests is identifying the saturation point.
|
||||
The saturation point is a setup with a transaction load big enough to prevent the testnet
|
||||
from being stable: the load runner tries to produce slightly more transactions than can
|
||||
be processed by the testnet.
|
||||
|
||||
The following table summarizes the results for v0.34.x, for the different experiments
|
||||
(extracted from file [`v034_report_tabbed.txt`](./img/v034_report_tabbed.txt)).
|
||||
|
||||
The X axis of this table is `c`, the number of connections created by the load runner process to the target node.
|
||||
The Y axis of this table is `r`, the rate or number of transactions issued per second.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35600 |
|
||||
| r=200 | 17800 | 35600 | 38660 |
|
||||
|
||||
The table shows the number of 1024-byte-long transactions that were produced by the load runner,
|
||||
and processed by Tendermint, during the 90 seconds of the experiment's duration.
|
||||
Each cell in the table refers to an experiment with a particular number of websocket connections (`c`)
|
||||
to a chosen validator, and the number of transactions per second that the load runner
|
||||
tries to produce (`r`). Note that the overall load that the tool attempts to generate is $c \cdot r$.
|
||||
|
||||
We can see that the saturation point is beyond the diagonal that spans cells
|
||||
|
||||
* `r=200,c=2`
|
||||
* `r=100,c=4`
|
||||
|
||||
given that the total transactions should be close to the product of the rate, the number of connections,
|
||||
and the experiment time (89 seconds, since the last batch never gets sent).
|
||||
|
||||
All experiments below the saturation diagonal (`r=200,c=4`) have in common that the total
|
||||
number of transactions processed is noticeably less than the product $c \cdot r \cdot 89$,
|
||||
which is the expected number of transactions when the system is able to deal well with the
|
||||
load.
|
||||
With `r=200,c=4`, we obtained 38660 whereas the theoretical number of transactions should
|
||||
have been $200 \cdot 4 \cdot 89 = 71200$.
|
||||
|
||||
At this point, we chose an experiment at the limit of the saturation diagonal,
|
||||
in order to further study the performance of this release.
|
||||
**The chosen experiment is `r=200,c=2`**.
|
||||
|
||||
This is a plot of the CPU load (average over 1 minute, as output by `top`) of the load runner for `r=200,c=2`,
|
||||
where we can see that the load stays close to 0 most of the time.
|
||||
|
||||

|
||||
|
||||
### Examining latencies
|
||||
|
||||
The method described [here](../method.md) allows us to plot the latencies of transactions
|
||||
for all experiments.
|
||||
|
||||

|
||||
|
||||
As we can see, even the experiments beyond the saturation diagonal managed to keep
|
||||
transaction latency stable (i.e. not constantly increasing).
|
||||
Our interpretation for this is that contention within Tendermint was propagated,
|
||||
via the websockets, to the load runner,
|
||||
hence the load runner could not produce the target load, but a fraction of it.
|
||||
|
||||
Further examination of the Prometheus data (see below), showed that the mempool contained many transactions
|
||||
at steady state, but did not grow much without quickly returning to this steady state. This demonstrates
|
||||
that the transactions were able to be processed by the Tendermint network at least as quickly as they
|
||||
were submitted to the mempool. Finally, the test script made sure that, at the end of an experiment, the
|
||||
mempool was empty so that all transactions submitted to the chain were processed.
|
||||
|
||||
Finally, the number of points present in the plot appears to be much less than expected given the
|
||||
number of transactions in each experiment, particularly close to or above the saturation diagonal.
|
||||
This is a visual effect of the plot; what appear to be points in the plot are actually potentially huge
|
||||
clusters of points. To corroborate this, we have zoomed in the plot above by setting (carefully chosen)
|
||||
tiny axis intervals. The cluster shown below looks like a single point in the plot above.
|
||||
|
||||

|
||||
|
||||
The plot of latencies can we used as a baseline to compare with other releases.
|
||||
|
||||
The following plot summarizes average latencies versus overall throughputs
|
||||
across different numbers of WebSocket connections to the node into which
|
||||
transactions are being loaded.
|
||||
|
||||

|
||||
|
||||
### Prometheus Metrics on the Chosen Experiment
|
||||
|
||||
As mentioned [above](#finding-the-saturation-point), the chosen experiment is `r=200,c=2`.
|
||||
This section further examines key metrics for this experiment extracted from Prometheus data.
|
||||
|
||||
#### Mempool Size
|
||||
|
||||
The mempool size, a count of the number of transactions in the mempool, was shown to be stable and homogeneous
|
||||
at all full nodes. It did not exhibit any unconstrained growth.
|
||||
The plot below shows the evolution over time of the cumulative number of transactions inside all full nodes' mempools
|
||||
at a given time.
|
||||
The two spikes that can be observed correspond to a period where consensus instances proceeded beyond the initial round
|
||||
at some nodes.
|
||||
|
||||

|
||||
|
||||
The plot below shows evolution of the average over all full nodes, which oscillates between 1500 and 2000
|
||||
outstanding transactions.
|
||||
|
||||

|
||||
|
||||
The peaks observed coincide with the moments when some nodes proceeded beyond the initial round of consensus (see below).
|
||||
|
||||
#### Peers
|
||||
|
||||
The number of peers was stable at all nodes.
|
||||
It was higher for the seed nodes (around 140) than for the rest (between 21 and 74).
|
||||
The fact that non-seed nodes reach more than 50 peers is due to #9548.
|
||||
|
||||

|
||||
|
||||
#### Consensus Rounds per Height
|
||||
|
||||
Most heights took just one round, but some nodes needed to advance to round 1 at some point.
|
||||
|
||||

|
||||
|
||||
#### Blocks Produced per Minute, Transactions Processed per Minute
|
||||
|
||||
The blocks produced per minute are the slope of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the height goes from 530 to 569.
|
||||
This results in an average of 19.5 blocks produced per minute.
|
||||
|
||||
The transactions processed per minute are the slope of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the total goes from 64525 to 100125 transactions,
|
||||
resulting in 17800 transactions per minute. However, we can see in the plot that
|
||||
all transactions in the load are processed long before the two minutes.
|
||||
If we adjust the time window when transactions are processed (approx. 105 seconds),
|
||||
we obtain 20343 transactions per minute.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
Resident Set Size of all monitored processes is plotted below.
|
||||
|
||||

|
||||
|
||||
The average over all processes oscillates around 1.2 GiB and does not demonstrate unconstrained growth.
|
||||
|
||||

|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The best metric from Prometheus to gauge CPU utilization in a Unix machine is `load1`,
|
||||
as it usually appears in the
|
||||
[output of `top`](https://www.digitalocean.com/community/tutorials/load-average-in-linux).
|
||||
|
||||

|
||||
|
||||
It is contained in most cases below 5, which is generally considered acceptable load.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: N/A** (v0.34.x is the baseline)
|
||||
|
||||
Date: 2022-10-14
|
||||
|
||||
Version: 3ec6e424d6ae4c96867c2dcf8310572156068bb6
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
For this testnet, we will use a load that can safely be considered below the saturation
|
||||
point for the size of this testnet (between 13 and 38 full nodes): `c=4,r=800`.
|
||||
|
||||
N.B.: The version of Tendermint used for these tests is affected by #9539.
|
||||
However, the reduced load that reaches the mempools is orthogonal to functionality
|
||||
we are focusing on here.
|
||||
|
||||
### Latencies
|
||||
|
||||
The plot of all latencies can be seen in the following plot.
|
||||
|
||||

|
||||
|
||||
We can observe there are some very high latencies, towards the end of the test.
|
||||
Upon suspicion that they are duplicate transactions, we examined the latencies
|
||||
raw file and discovered there are more than 100K duplicate transactions.
|
||||
|
||||
The following plot shows the latencies file where all duplicate transactions have
|
||||
been removed, i.e., only the first occurrence of a duplicate transaction is kept.
|
||||
|
||||

|
||||
|
||||
This problem, existing in `v0.34.x`, will need to be addressed, perhaps in the same way
|
||||
we addressed it when running the 200 node test with high loads: increasing the `cache_size`
|
||||
configuration parameter.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
The set of metrics shown here are less than for the 200 node experiment.
|
||||
We are only interested in those for which the catch-up process (blocksync) may have an impact.
|
||||
|
||||
#### Blocks and Transactions per minute
|
||||
|
||||
Just as shown for the 200 node test, the blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 5229 seconds, the height goes from 2 to 3638.
|
||||
This results in an average of 41 blocks produced per minute.
|
||||
|
||||
The following plot shows only the heights reported by ephemeral nodes
|
||||
(which are also included in the plot above). Note that the _height_ metric
|
||||
is only showed _once the node has switched to consensus_, hence the gaps
|
||||
when nodes are killed, wiped out, started from scratch, and catching up.
|
||||
|
||||

|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
The small lines we see periodically close to `y=0` are the transactions that
|
||||
ephemeral nodes start processing when they are caught up.
|
||||
|
||||
Over a period of 5229 minutes, the total goes from 0 to 387697 transactions,
|
||||
resulting in 4449 transactions per minute. We can see some abrupt changes in
|
||||
the plot's gradient. This will need to be investigated.
|
||||
|
||||
#### Peers
|
||||
|
||||
The plot below shows the evolution in peers throughout the experiment.
|
||||
The periodic changes observed are due to the ephemeral nodes being stopped,
|
||||
wiped out, and recreated.
|
||||
|
||||

|
||||
|
||||
The validators' plots are concentrated at the higher part of the graph, whereas the ephemeral nodes
|
||||
are mostly at the lower part.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
The average Resident Set Size (RSS) over all processes seems stable, and slightly growing toward the end.
|
||||
This might be related to the increased in transaction load observed above.
|
||||
|
||||

|
||||
|
||||
The memory taken by the validators and the ephemeral nodes (when they are up) is comparable.
|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The plot shows metric `load1` for all nodes.
|
||||
|
||||

|
||||
|
||||
It is contained under 5 most of the time, which is considered normal load.
|
||||
The purple line, which follows a different pattern is the validator receiving all
|
||||
transactions, via RPC, from the load runner process.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: N/A**
|
||||
|
||||
Date: 2022-10-10
|
||||
|
||||
Version: a28c987f5a604ff66b515dd415270063e6fb069d
|
||||
BIN
docs/qa/v034/img/v034_200node_latencies.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/qa/v034/img/v034_200node_latencies_zoomed.png
Normal file
|
After Width: | Height: | Size: 34 KiB |
BIN
docs/qa/v034/img/v034_latency_throughput.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/qa/v034/img/v034_r200c2_heights.png
Normal file
|
After Width: | Height: | Size: 378 KiB |
BIN
docs/qa/v034/img/v034_r200c2_load-runner.png
Normal file
|
After Width: | Height: | Size: 150 KiB |
BIN
docs/qa/v034/img/v034_r200c2_load1.png
Normal file
|
After Width: | Height: | Size: 759 KiB |
BIN
docs/qa/v034/img/v034_r200c2_mempool_size.png
Normal file
|
After Width: | Height: | Size: 2.4 MiB |
BIN
docs/qa/v034/img/v034_r200c2_mempool_size_avg.png
Normal file
|
After Width: | Height: | Size: 192 KiB |
BIN
docs/qa/v034/img/v034_r200c2_peers.png
Normal file
|
After Width: | Height: | Size: 130 KiB |
BIN
docs/qa/v034/img/v034_r200c2_rounds.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
docs/qa/v034/img/v034_r200c2_rss.png
Normal file
|
After Width: | Height: | Size: 926 KiB |
BIN
docs/qa/v034/img/v034_r200c2_rss_avg.png
Normal file
|
After Width: | Height: | Size: 157 KiB |
BIN
docs/qa/v034/img/v034_r200c2_total-txs.png
Normal file
|
After Width: | Height: | Size: 534 KiB |
52
docs/qa/v034/img/v034_report_tabbed.txt
Normal file
@@ -0,0 +1,52 @@
|
||||
Experiment ID: 3d5cf4ef-1a1a-4b46-aa2d-da5643d2e81e │Experiment ID: 80e472ec-13a1-4772-a827-3b0c907fb51d │Experiment ID: 07aca6cf-c5a4-4696-988f-e3270fc6333b
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 25 │ Rate: 25 │ Rate: 25
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 2225 │ Total Valid Tx: 4450 │ Total Valid Tx: 8900
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 599.404362ms │ Minimum Latency: 448.145181ms │ Minimum Latency: 412.485729ms
|
||||
Maximum Latency: 3.539686885s │ Maximum Latency: 3.237392049s │ Maximum Latency: 12.026665368s
|
||||
Average Latency: 1.441485349s │ Average Latency: 1.441267946s │ Average Latency: 2.150192457s
|
||||
Standard Deviation: 541.049869ms │ Standard Deviation: 525.040007ms │ Standard Deviation: 2.233852478s
|
||||
│ │
|
||||
Experiment ID: 953dc544-dd40-40e8-8712-20c34c3ce45e │Experiment ID: d31fc258-16e7-45cd-9dc8-13ab87bc0b0a │Experiment ID: 15d90a7e-b941-42f4-b411-2f15f857739e
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 50 │ Rate: 50 │ Rate: 50
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 4450 │ Total Valid Tx: 8900 │ Total Valid Tx: 17800
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 482.046942ms │ Minimum Latency: 435.458913ms │ Minimum Latency: 510.746448ms
|
||||
Maximum Latency: 3.761483455s │ Maximum Latency: 7.175583584s │ Maximum Latency: 6.551497882s
|
||||
Average Latency: 1.450408183s │ Average Latency: 1.681673116s │ Average Latency: 1.738083875s
|
||||
Standard Deviation: 587.560056ms │ Standard Deviation: 1.147902047s │ Standard Deviation: 943.46522ms
|
||||
│ │
|
||||
Experiment ID: 9a0b9980-9ce6-4db5-a80a-65ca70294b87 │Experiment ID: df8fa4f4-80af-4ded-8a28-356d15018b43 │Experiment ID: d0e41c2c-89c0-4f38-8e34-ca07adae593a
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 100 │ Rate: 100 │ Rate: 100
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 8900 │ Total Valid Tx: 17800 │ Total Valid Tx: 35600
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 477.417219ms │ Minimum Latency: 564.29247ms │ Minimum Latency: 840.71089ms
|
||||
Maximum Latency: 6.63744785s │ Maximum Latency: 6.988553219s │ Maximum Latency: 9.555312398s
|
||||
Average Latency: 1.561216103s │ Average Latency: 1.76419063s │ Average Latency: 3.200941683s
|
||||
Standard Deviation: 1.011333552s │ Standard Deviation: 1.068459423s │ Standard Deviation: 1.732346601s
|
||||
│ │
|
||||
Experiment ID: 493df3ee-4a36-4bce-80f8-6d65da66beda │Experiment ID: 13060525-f04f-46f6-8ade-286684b2fe50 │Experiment ID: 1777cbd2-8c96-42e4-9ec7-9b21f2225e4d
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 200 │ Rate: 200 │ Rate: 200
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 17800 │ Total Valid Tx: 35600 │ Total Valid Tx: 38660
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 493.705261ms │ Minimum Latency: 955.090573ms │ Minimum Latency: 1.9485821s
|
||||
Maximum Latency: 7.440921872s │ Maximum Latency: 10.086673491s │ Maximum Latency: 17.73103976s
|
||||
Average Latency: 1.875510582s │ Average Latency: 3.438130099s │ Average Latency: 8.143862237s
|
||||
Standard Deviation: 1.304336995s │ Standard Deviation: 1.966391574s │ Standard Deviation: 3.943140002s
|
||||
|
||||
BIN
docs/qa/v034/img/v034_rotating_heights.png
Normal file
|
After Width: | Height: | Size: 157 KiB |
BIN
docs/qa/v034/img/v034_rotating_heights_ephe.png
Normal file
|
After Width: | Height: | Size: 140 KiB |
BIN
docs/qa/v034/img/v034_rotating_latencies.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v034/img/v034_rotating_latencies_uniq.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v034/img/v034_rotating_load1.png
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
BIN
docs/qa/v034/img/v034_rotating_peers.png
Normal file
|
After Width: | Height: | Size: 486 KiB |
BIN
docs/qa/v034/img/v034_rotating_rss_avg.png
Normal file
|
After Width: | Height: | Size: 193 KiB |
BIN
docs/qa/v034/img/v034_rotating_total-txs.png
Normal file
|
After Width: | Height: | Size: 197 KiB |
326
docs/qa/v037/README.md
Normal file
@@ -0,0 +1,326 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance Results for v0.37.x
|
||||
description: This is a report on the results obtained when running v0.37.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# v0.37.x
|
||||
|
||||
## Issues discovered
|
||||
|
||||
During this iteration of the QA process, the following issues were found:
|
||||
|
||||
* (critical, fixed) [\#9533] - This bug caused full nodes to sometimes get stuck
|
||||
when blocksyncing, requiring a manual restart to unblock them. Importantly,
|
||||
this bug was also present in v0.34.x and the fix was also backported in
|
||||
[\#9534].
|
||||
* (critical, fixed) [\#9539] - `loadtime` is very likely to include more than
|
||||
one "=" character in transactions, with is rejected by the e2e application.
|
||||
* (critical, fixed) [\#9581] - Absent prometheus label makes Tendermint crash
|
||||
when enabling Prometheus metric collection
|
||||
* (non-critical, not fixed) [\#9548] - Full nodes can go over 50 connected
|
||||
peers, which is not intended by the default configuration.
|
||||
* (non-critical, not fixed) [\#9537] - With the default mempool cache setting,
|
||||
duplicated transactions are not rejected when gossipped and eventually flood
|
||||
all mempools. The 200 node testnets were thus run with a value of 200000 (as
|
||||
opposed to the default 10000)
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Finding the Saturation Point
|
||||
|
||||
The first goal is to identify the saturation point and compare it with the baseline (v0.34.x).
|
||||
For further details, see [this paragraph](../v034/README.md#finding-the-saturation-point)
|
||||
in the baseline version.
|
||||
|
||||
The following table summarizes the results for v0.37.x, for the different experiments
|
||||
(extracted from file [`v037_report_tabbed.txt`](./img/v037_report_tabbed.txt)).
|
||||
|
||||
The X axis of this table is `c`, the number of connections created by the load runner process to the target node.
|
||||
The Y axis of this table is `r`, the rate or number of transactions issued per second.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35600 |
|
||||
| r=200 | 17800 | 35600 | 38660 |
|
||||
|
||||
For comparison, this is the table with the baseline version.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35400 |
|
||||
| r=200 | 17800 | 35600 | 37358 |
|
||||
|
||||
The saturation point is beyond the diagonal:
|
||||
|
||||
* `r=200,c=2`
|
||||
* `r=100,c=4`
|
||||
|
||||
which is at the same place as the baseline. For more details on the saturation point, see
|
||||
[this paragraph](../v034/README.md#finding-the-saturation-point) in the baseline version.
|
||||
|
||||
The experiment chosen to examine Prometheus metrics is the same as in the baseline:
|
||||
**`r=200,c=2`**.
|
||||
|
||||
The load runner's CPU load was negligible (near 0) when running `r=200,c=2`.
|
||||
|
||||
### Examining latencies
|
||||
|
||||
The method described [here](../method.md) allows us to plot the latencies of transactions
|
||||
for all experiments.
|
||||
|
||||

|
||||
|
||||
The data seen in the plot is similar to that of the baseline.
|
||||
|
||||

|
||||
|
||||
Therefore, for further details on these plots,
|
||||
see [this paragraph](../v034/README.md#examining-latencies) in the baseline version.
|
||||
|
||||
The following plot summarizes average latencies versus overall throughputs
|
||||
across different numbers of WebSocket connections to the node into which
|
||||
transactions are being loaded.
|
||||
|
||||

|
||||
|
||||
This is similar to that of the baseline plot:
|
||||
|
||||

|
||||
|
||||
### Prometheus Metrics on the Chosen Experiment
|
||||
|
||||
As mentioned [above](#finding-the-saturation-point), the chosen experiment is `r=200,c=2`.
|
||||
This section further examines key metrics for this experiment extracted from Prometheus data.
|
||||
|
||||
#### Mempool Size
|
||||
|
||||
The mempool size, a count of the number of transactions in the mempool, was shown to be stable and homogeneous
|
||||
at all full nodes. It did not exhibit any unconstrained growth.
|
||||
The plot below shows the evolution over time of the cumulative number of transactions inside all full nodes' mempools
|
||||
at a given time.
|
||||
|
||||

|
||||
|
||||
The plot below shows evolution of the average over all full nodes, which oscillate between 1500 and 2000 outstanding transactions.
|
||||
|
||||

|
||||
|
||||
The peaks observed coincide with the moments when some nodes reached round 1 of consensus (see below).
|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Peers
|
||||
|
||||
The number of peers was stable at all nodes.
|
||||
It was higher for the seed nodes (around 140) than for the rest (between 16 and 78).
|
||||
|
||||

|
||||
|
||||
Just as in the baseline, the fact that non-seed nodes reach more than 50 peers is due to #9548.
|
||||
|
||||
**This plot yields similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||
#### Consensus Rounds per Height
|
||||
|
||||
Most heights took just one round, but some nodes needed to advance to round 1 at some point.
|
||||
|
||||

|
||||
|
||||
**This plot yields slightly better results than the baseline**:
|
||||
|
||||

|
||||
|
||||
#### Blocks Produced per Minute, Transactions Processed per Minute
|
||||
|
||||
The blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the height goes from 477 to 524.
|
||||
This results in an average of 23.5 blocks produced per minute.
|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the total goes from 64525 to 100125 transactions,
|
||||
resulting in 17800 transactions per minute. However, we can see in the plot that
|
||||
all transactions in the load are process long before the two minutes.
|
||||
If we adjust the time window when transactions are processed (approx. 90 seconds),
|
||||
we obtain 23733 transactions per minute.
|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
Resident Set Size of all monitored processes is plotted below.
|
||||
|
||||

|
||||
|
||||
The average over all processes oscillates around 380 MiB and does not demonstrate unconstrained growth.
|
||||
|
||||

|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The best metric from Prometheus to gauge CPU utilization in a Unix machine is `load1`,
|
||||
as it usually appears in the
|
||||
[output of `top`](https://www.digitalocean.com/community/tutorials/load-average-in-linux).
|
||||
|
||||

|
||||
|
||||
It is contained below 5 on most nodes.
|
||||
|
||||
**This plot yields similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: PASS**
|
||||
|
||||
Date: 2022-10-14
|
||||
|
||||
Version: 1cf9d8e276afe8595cba960b51cd056514965fd1
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
We use the same load as in the baseline: `c=4,r=800`.
|
||||
|
||||
Just as in the baseline tests, the version of Tendermint used for these tests is affected by #9539.
|
||||
See this paragraph in the [baseline report](../v034/README.md#rotating-node-testnet) for further details.
|
||||
Finally, note that this setup allows for a fairer comparison between this version and the baseline.
|
||||
|
||||
### Latencies
|
||||
|
||||
The plot of all latencies can be seen here.
|
||||
|
||||

|
||||
|
||||
Which is similar to the baseline.
|
||||
|
||||

|
||||
|
||||
Note that we are comparing against the baseline plot with _unique_
|
||||
transactions. This is because the problem with duplicate transactions
|
||||
detected during the baseline experiment did not show up for `v0.37`,
|
||||
which is _not_ proof that the problem is not present in `v0.37`.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
The set of metrics shown here match those shown on the baseline (`v0.34`) for the same experiment.
|
||||
We also show the baseline results for comparison.
|
||||
|
||||
#### Blocks and Transactions per minute
|
||||
|
||||
The blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 4446 seconds, the height goes from 5 to 3323.
|
||||
This results in an average of 45 blocks produced per minute,
|
||||
which is similar to the baseline, shown below.
|
||||
|
||||

|
||||
|
||||
The following two plots show only the heights reported by ephemeral nodes.
|
||||
The second plot is the baseline plot for comparison.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
By the length of the segments, we can see that ephemeral nodes in `v0.37`
|
||||
catch up slightly faster.
|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 3852 seconds, the total goes from 597 to 267298 transactions in one of the validators,
|
||||
resulting in 4154 transactions per minute, which is slightly lower than the baseline,
|
||||
although the baseline had to deal with duplicate transactions.
|
||||
|
||||
For comparison, this is the baseline plot.
|
||||
|
||||

|
||||
|
||||
#### Peers
|
||||
|
||||
The plot below shows the evolution of the number of peers throughout the experiment.
|
||||
|
||||

|
||||
|
||||
This is the baseline plot, for comparison.
|
||||
|
||||

|
||||
|
||||
The plotted values and their evolution are comparable in both plots.
|
||||
|
||||
For further details on these plots, see the baseline report.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
The average Resident Set Size (RSS) over all processes looks slightly more stable
|
||||
on `v0.37` (first plot) than on the baseline (second plot).
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
The memory taken by the validators and the ephemeral nodes when they are up is comparable (not shown in the plots),
|
||||
just as observed in the baseline.
|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The plot shows metric `load1` for all nodes.
|
||||
|
||||

|
||||
|
||||
This is the baseline plot.
|
||||
|
||||

|
||||
|
||||
In both cases, it is contained under 5 most of the time, which is considered normal load.
|
||||
The green line in the `v0.37` plot and the purple line in the baseline plot (`v0.34`)
|
||||
correspond to the validators receiving all transactions, via RPC, from the load runner process.
|
||||
In both cases, they oscillate around 5 (normal load). The main difference is that other
|
||||
nodes are generally less loaded in `v0.37`.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: PASS**
|
||||
|
||||
Date: 2022-10-10
|
||||
|
||||
Version: 155110007b9d8b83997a799016c1d0844c8efbaf
|
||||
|
||||
[\#9533]: https://github.com/tendermint/tendermint/pull/9533
|
||||
[\#9534]: https://github.com/tendermint/tendermint/pull/9534
|
||||
[\#9539]: https://github.com/tendermint/tendermint/issues/9539
|
||||
[\#9548]: https://github.com/tendermint/tendermint/issues/9548
|
||||
[\#9537]: https://github.com/tendermint/tendermint/issues/9537
|
||||
[\#9581]: https://github.com/tendermint/tendermint/issues/9581
|
||||
BIN
docs/qa/v037/img/v037_200node_latencies.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/qa/v037/img/v037_latency_throughput.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/qa/v037/img/v037_r200c2_heights.png
Normal file
|
After Width: | Height: | Size: 411 KiB |
BIN
docs/qa/v037/img/v037_r200c2_load1.png
Normal file
|
After Width: | Height: | Size: 887 KiB |
BIN
docs/qa/v037/img/v037_r200c2_mempool_size.png
Normal file
|
After Width: | Height: | Size: 2.3 MiB |
BIN
docs/qa/v037/img/v037_r200c2_mempool_size_avg.png
Normal file
|
After Width: | Height: | Size: 183 KiB |
BIN
docs/qa/v037/img/v037_r200c2_peers.png
Normal file
|
After Width: | Height: | Size: 133 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rounds.png
Normal file
|
After Width: | Height: | Size: 589 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rss.png
Normal file
|
After Width: | Height: | Size: 816 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rss_avg.png
Normal file
|
After Width: | Height: | Size: 154 KiB |
BIN
docs/qa/v037/img/v037_r200c2_total-txs.png
Normal file
|
After Width: | Height: | Size: 538 KiB |
52
docs/qa/v037/img/v037_report_tabbed.txt
Normal file
@@ -0,0 +1,52 @@
|
||||
Experiment ID: af129eae-7039-4c76-8c37-cff9ac636a84 │Experiment ID: 0f88bd33-9bf0-4197-8d1d-9a737c301ec6 │Experiment ID: 88227cad-2ba8-4eb6-b493-041d8120b46f
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 25 │ Rate: 25 │ Rate: 25
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 2225 │ Total Valid Tx: 4450 │ Total Valid Tx: 8900
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 506.248587ms │ Minimum Latency: 469.53452ms │ Minimum Latency: 588.900721ms
|
||||
Maximum Latency: 3.032125789s │ Maximum Latency: 6.548830955s │ Maximum Latency: 6.533739843s
|
||||
Average Latency: 1.427767726s │ Average Latency: 1.448582257s │ Average Latency: 1.717432341s
|
||||
Standard Deviation: 524.11782ms │ Standard Deviation: 768.684133ms │ Standard Deviation: 1.000015768s
|
||||
│ │
|
||||
Experiment ID: f03d39bd-0233-4b3c-b461-543445ae1d4b │Experiment ID: 46674f1c-e591-4e36-bb9b-f375c19fc475 │Experiment ID: 5385c159-8d4d-455b-bced-dcd4a3209988
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 50 │ Rate: 50 │ Rate: 50
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 4450 │ Total Valid Tx: 8900 │ Total Valid Tx: 17800
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 477.46027ms │ Minimum Latency: 455.757111ms │ Minimum Latency: 594.749081ms
|
||||
Maximum Latency: 2.483895394s │ Maximum Latency: 2.904715695s │ Maximum Latency: 9.294950389s
|
||||
Average Latency: 1.407374662s │ Average Latency: 1.397385779s │ Average Latency: 2.621122536s
|
||||
Standard Deviation: 505.150067ms │ Standard Deviation: 551.67603ms │ Standard Deviation: 1.772725794s
|
||||
│ │
|
||||
Experiment ID: 9161b4a7-d75c-455f-b82d-2b5235d533cf │Experiment ID: 993a13a8-9db1-4b2b-9c20-71a5b85e4bbf │Experiment ID: ad1eb9e1-f4d6-41fd-9ba7-0f1f7dde1e3e
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 100 │ Rate: 100 │ Rate: 100
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 8900 │ Total Valid Tx: 17800 │ Total Valid Tx: 35400
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 448.050467ms │ Minimum Latency: 605.436195ms │ Minimum Latency: 1.16816912s
|
||||
Maximum Latency: 3.789711139s │ Maximum Latency: 7.292770222s │ Maximum Latency: 11.378681842s
|
||||
Average Latency: 1.451342158s │ Average Latency: 2.07457999s │ Average Latency: 3.918384209s
|
||||
Standard Deviation: 644.075973ms │ Standard Deviation: 1.230204022s │ Standard Deviation: 2.172400458s
|
||||
│ │
|
||||
Experiment ID: 3cbe9c3d-9c43-4c9f-b5ca-b567d20bbd57 │Experiment ID: af836c5e-d9b6-4d5d-971c-2fc7f07aa2a0 │Experiment ID: 77606397-4989-41d4-b13b-f1f4d1af063f
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 200 │ Rate: 200 │ Rate: 200
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 17800 │ Total Valid Tx: 35600 │ Total Valid Tx: 37358
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 519.984701ms │ Minimum Latency: 820.755087ms │ Minimum Latency: 1.712574804s
|
||||
Maximum Latency: 12.609056712s │ Maximum Latency: 9.260798095s │ Maximum Latency: 25.739223696s
|
||||
Average Latency: 2.717853101s │ Average Latency: 3.477731881s │ Average Latency: 8.547725264s
|
||||
Standard Deviation: 2.390778155s │ Standard Deviation: 1.675000913s │ Standard Deviation: 4.76961569s
|
||||
|
||||
BIN
docs/qa/v037/img/v037_rotating_heights.png
Normal file
|
After Width: | Height: | Size: 167 KiB |
BIN
docs/qa/v037/img/v037_rotating_heights_ephe.png
Normal file
|
After Width: | Height: | Size: 138 KiB |
BIN
docs/qa/v037/img/v037_rotating_latencies.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v037/img/v037_rotating_load1.png
Normal file
|
After Width: | Height: | Size: 1.3 MiB |
BIN
docs/qa/v037/img/v037_rotating_peers.png
Normal file
|
After Width: | Height: | Size: 577 KiB |
BIN
docs/qa/v037/img/v037_rotating_rss_avg.png
Normal file
|
After Width: | Height: | Size: 217 KiB |
BIN
docs/qa/v037/img/v037_rotating_total-txs.png
Normal file
|
After Width: | Height: | Size: 181 KiB |
6
go.mod
@@ -3,7 +3,7 @@ module github.com/tendermint/tendermint
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.2.0
|
||||
github.com/BurntSushi/toml v1.2.1
|
||||
github.com/adlio/schema v1.3.3
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
|
||||
github.com/fortytw2/leaktest v1.3.0
|
||||
@@ -28,13 +28,13 @@ require (
|
||||
github.com/rs/cors v1.8.2
|
||||
github.com/sasha-s/go-deadlock v0.3.1
|
||||
github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa
|
||||
github.com/spf13/cobra v1.5.0
|
||||
github.com/spf13/cobra v1.6.0
|
||||
github.com/spf13/viper v1.13.0
|
||||
github.com/stretchr/testify v1.8.0
|
||||
github.com/tendermint/tm-db v0.6.6
|
||||
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa
|
||||
golang.org/x/net v0.0.0-20220812174116-3211cb980234
|
||||
google.golang.org/grpc v1.49.0
|
||||
google.golang.org/grpc v1.50.1
|
||||
)
|
||||
|
||||
require (
|
||||
|
||||
12
go.sum
@@ -53,8 +53,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/toml v1.2.0 h1:Rt8g24XnyGTyglgET/PRUNlrUeu9F5L+7FilkXfZgs0=
|
||||
github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak=
|
||||
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d/go.mod h1:URdX5+vg25ts3aCh8H5IFZybJYKWhJHYMTnf+ULtoC4=
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60=
|
||||
@@ -1042,8 +1042,8 @@ github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tL
|
||||
github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE=
|
||||
github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI=
|
||||
github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
|
||||
github.com/spf13/cobra v1.5.0 h1:X+jTBEBqF0bHN+9cSMgmfuvv2VHJ9ezmFNf9Y/XstYU=
|
||||
github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM=
|
||||
github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI=
|
||||
github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
|
||||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
||||
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
|
||||
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
|
||||
@@ -1696,8 +1696,8 @@ google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQ
|
||||
google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=
|
||||
google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k=
|
||||
google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU=
|
||||
google.golang.org/grpc v1.49.0 h1:WTLtQzmQori5FUH25Pq4WT22oCsv8USpQ+F6rqtsmxw=
|
||||
google.golang.org/grpc v1.49.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI=
|
||||
google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY=
|
||||
google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
|
||||
@@ -31,7 +31,7 @@ To replace the built-in p2p.Reactor, use the CustomReactors option:
|
||||
dbProvider,
|
||||
metricsProvider,
|
||||
logger,
|
||||
CustomReactors(map[string]p2p.Reactor{"BLOCKCHAIN": customBlockchainReactor}),
|
||||
CustomReactors(map[string]p2p.Reactor{"BLOCKSYNC": customBlocksyncReactor}),
|
||||
)
|
||||
|
||||
The list of existing reactors can be found in CustomReactors documentation.
|
||||
|
||||
20
node/node.go
@@ -146,7 +146,7 @@ type blockSyncReactor interface {
|
||||
// result in replacing it with the custom one.
|
||||
//
|
||||
// - MEMPOOL
|
||||
// - BLOCKCHAIN
|
||||
// - BLOCKSYNC
|
||||
// - CONSENSUS
|
||||
// - EVIDENCE
|
||||
// - PEX
|
||||
@@ -303,7 +303,7 @@ func createAndStartIndexerService(
|
||||
blockIndexer = &blockidxnull.BlockerIndexer{}
|
||||
}
|
||||
|
||||
indexerService := txindex.NewIndexerService(txIndexer, blockIndexer, eventBus)
|
||||
indexerService := txindex.NewIndexerService(txIndexer, blockIndexer, eventBus, false)
|
||||
indexerService.SetLogger(logger.With("module", "txindex"))
|
||||
|
||||
if err := indexerService.Start(); err != nil {
|
||||
@@ -441,7 +441,7 @@ func createEvidenceReactor(config *cfg.Config, dbProvider DBProvider,
|
||||
return evidenceReactor, evidencePool, nil
|
||||
}
|
||||
|
||||
func createBlockchainReactor(config *cfg.Config,
|
||||
func createBlocksyncReactor(config *cfg.Config,
|
||||
state sm.State,
|
||||
blockExec *sm.BlockExecutor,
|
||||
blockStore *store.BlockStore,
|
||||
@@ -457,7 +457,7 @@ func createBlockchainReactor(config *cfg.Config,
|
||||
return nil, fmt.Errorf("unknown fastsync version %s", config.BlockSync.Version)
|
||||
}
|
||||
|
||||
bcReactor.SetLogger(logger.With("module", "blockchain"))
|
||||
bcReactor.SetLogger(logger.With("module", "blocksync"))
|
||||
return bcReactor, nil
|
||||
}
|
||||
|
||||
@@ -584,7 +584,7 @@ func createSwitch(config *cfg.Config,
|
||||
)
|
||||
sw.SetLogger(p2pLogger)
|
||||
sw.AddReactor("MEMPOOL", mempoolReactor)
|
||||
sw.AddReactor("BLOCKCHAIN", bcReactor)
|
||||
sw.AddReactor("BLOCKSYNC", bcReactor)
|
||||
sw.AddReactor("CONSENSUS", consensusReactor)
|
||||
sw.AddReactor("EVIDENCE", evidenceReactor)
|
||||
sw.AddReactor("STATESYNC", stateSyncReactor)
|
||||
@@ -803,7 +803,7 @@ func NewNode(config *cfg.Config,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// make block executor for consensus and blockchain reactors to execute blocks
|
||||
// make block executor for consensus and blocksync reactors to execute blocks
|
||||
blockExec := sm.NewBlockExecutor(
|
||||
stateStore,
|
||||
logger.With("module", "state"),
|
||||
@@ -814,10 +814,10 @@ func NewNode(config *cfg.Config,
|
||||
sm.BlockExecutorWithMetrics(smMetrics),
|
||||
)
|
||||
|
||||
// Make BlockchainReactor. Don't start block sync if we're doing a state sync first.
|
||||
bcReactor, err := createBlockchainReactor(config, state, blockExec, blockStore, blockSync && !stateSync, logger)
|
||||
// Make BlocksyncReactor. Don't start block sync if we're doing a state sync first.
|
||||
bcReactor, err := createBlocksyncReactor(config, state, blockExec, blockStore, blockSync && !stateSync, logger)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not create blockchain reactor: %w", err)
|
||||
return nil, fmt.Errorf("could not create blocksync reactor: %w", err)
|
||||
}
|
||||
|
||||
// Make ConsensusReactor. Don't enable fully if doing a state sync and/or block sync first.
|
||||
@@ -990,7 +990,7 @@ func (n *Node) OnStart() error {
|
||||
if n.stateSync {
|
||||
bcR, ok := n.bcReactor.(blockSyncReactor)
|
||||
if !ok {
|
||||
return fmt.Errorf("this blockchain reactor does not support switching from state sync")
|
||||
return fmt.Errorf("this blocksync reactor does not support switching from state sync")
|
||||
}
|
||||
err := startStateSync(n.stateSyncReactor, bcR, n.consensusReactor, n.stateSyncProvider,
|
||||
n.config.StateSync, n.config.BlockSyncMode, n.stateStore, n.blockStore, n.stateSyncGenesis)
|
||||
|
||||
@@ -427,7 +427,7 @@ func TestNodeNewNodeCustomReactors(t *testing.T) {
|
||||
RecvMessageCapacity: 100,
|
||||
},
|
||||
}
|
||||
customBlockchainReactor := p2pmock.NewReactor()
|
||||
customBlocksyncReactor := p2pmock.NewReactor()
|
||||
|
||||
nodeKey, err := p2p.LoadOrGenNodeKey(config.NodeKeyFile())
|
||||
require.NoError(t, err)
|
||||
@@ -440,7 +440,7 @@ func TestNodeNewNodeCustomReactors(t *testing.T) {
|
||||
DefaultDBProvider,
|
||||
DefaultMetricsProvider(config.Instrumentation),
|
||||
log.TestingLogger(),
|
||||
CustomReactors(map[string]p2p.Reactor{"FOO": cr, "BLOCKCHAIN": customBlockchainReactor}),
|
||||
CustomReactors(map[string]p2p.Reactor{"FOO": cr, "BLOCKSYNC": customBlocksyncReactor}),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -451,8 +451,8 @@ func TestNodeNewNodeCustomReactors(t *testing.T) {
|
||||
assert.True(t, cr.IsRunning())
|
||||
assert.Equal(t, cr, n.Switch().Reactor("FOO"))
|
||||
|
||||
assert.True(t, customBlockchainReactor.IsRunning())
|
||||
assert.Equal(t, customBlockchainReactor, n.Switch().Reactor("BLOCKCHAIN"))
|
||||
assert.True(t, customBlocksyncReactor.IsRunning())
|
||||
assert.Equal(t, customBlocksyncReactor, n.Switch().Reactor("BLOCKSYNC"))
|
||||
|
||||
channels := n.NodeInfo().(p2p.DefaultNodeInfo).Channels
|
||||
assert.Contains(t, channels, mempl.MempoolChannel)
|
||||
|
||||
@@ -145,6 +145,13 @@ func (e ErrTransportClosed) Error() string {
|
||||
return "transport has been closed"
|
||||
}
|
||||
|
||||
// ErrPeerRemoval is raised when attempting to remove a peer results in an error.
|
||||
type ErrPeerRemoval struct{}
|
||||
|
||||
func (e ErrPeerRemoval) Error() string {
|
||||
return "peer removal failed"
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
type ErrNetAddressNoID struct {
|
||||
|
||||
@@ -68,3 +68,5 @@ func (mp *Peer) RemoteIP() net.IP { return mp.ip }
|
||||
func (mp *Peer) SocketAddr() *p2p.NetAddress { return mp.addr }
|
||||
func (mp *Peer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.ip, Port: 8800} }
|
||||
func (mp *Peer) CloseConn() error { return nil }
|
||||
func (mp *Peer) SetRemovalFailed() {}
|
||||
func (mp *Peer) GetRemovalFailed() bool { return false }
|
||||
|
||||
@@ -53,6 +53,20 @@ func (_m *Peer) Get(_a0 string) interface{} {
|
||||
return r0
|
||||
}
|
||||
|
||||
// GetRemovalFailed provides a mock function with given fields:
|
||||
func (_m *Peer) GetRemovalFailed() bool {
|
||||
ret := _m.Called()
|
||||
|
||||
var r0 bool
|
||||
if rf, ok := ret.Get(0).(func() bool); ok {
|
||||
r0 = rf()
|
||||
} else {
|
||||
r0 = ret.Get(0).(bool)
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// ID provides a mock function with given fields:
|
||||
func (_m *Peer) ID() p2p.ID {
|
||||
ret := _m.Called()
|
||||
@@ -244,6 +258,11 @@ func (_m *Peer) SetLogger(_a0 log.Logger) {
|
||||
_m.Called(_a0)
|
||||
}
|
||||
|
||||
// SetRemovalFailed provides a mock function with given fields:
|
||||
func (_m *Peer) SetRemovalFailed() {
|
||||
_m.Called()
|
||||
}
|
||||
|
||||
// SocketAddr provides a mock function with given fields:
|
||||
func (_m *Peer) SocketAddr() *p2p.NetAddress {
|
||||
ret := _m.Called()
|
||||
|
||||
14
p2p/peer.go
@@ -39,6 +39,9 @@ type Peer interface {
|
||||
|
||||
Set(string, interface{})
|
||||
Get(string) interface{}
|
||||
|
||||
SetRemovalFailed()
|
||||
GetRemovalFailed() bool
|
||||
}
|
||||
|
||||
//----------------------------------------------------------
|
||||
@@ -117,6 +120,9 @@ type peer struct {
|
||||
|
||||
metrics *Metrics
|
||||
metricsTicker *time.Ticker
|
||||
|
||||
// When removal of a peer fails, we set this flag
|
||||
removalAttemptFailed bool
|
||||
}
|
||||
|
||||
type PeerOption func(*peer)
|
||||
@@ -316,6 +322,14 @@ func (p *peer) CloseConn() error {
|
||||
return p.peerConn.conn.Close()
|
||||
}
|
||||
|
||||
func (p *peer) SetRemovalFailed() {
|
||||
p.removalAttemptFailed = true
|
||||
}
|
||||
|
||||
func (p *peer) GetRemovalFailed() bool {
|
||||
return p.removalAttemptFailed
|
||||
}
|
||||
|
||||
//---------------------------------------------------
|
||||
// methods only used for testing
|
||||
// TODO: can we remove these?
|
||||
|
||||
@@ -47,6 +47,9 @@ func (ps *PeerSet) Add(peer Peer) error {
|
||||
if ps.lookup[peer.ID()] != nil {
|
||||
return ErrSwitchDuplicatePeerID{peer.ID()}
|
||||
}
|
||||
if peer.GetRemovalFailed() {
|
||||
return ErrPeerRemoval{}
|
||||
}
|
||||
|
||||
index := len(ps.list)
|
||||
// Appending is safe even with other goroutines
|
||||
@@ -107,6 +110,12 @@ func (ps *PeerSet) Remove(peer Peer) bool {
|
||||
|
||||
item := ps.lookup[peer.ID()]
|
||||
if item == nil {
|
||||
// Removing the peer has failed so we set a flag to mark that a removal was attempted.
|
||||
// This can happen when the peer add routine from the switch is running in
|
||||
// parallel to the receive routine of MConn.
|
||||
// There is an error within MConn but the switch has not actually added the peer to the peer set yet.
|
||||
// Setting this flag will prevent a peer from being added to a node's peer set afterwards.
|
||||
peer.SetRemovalFailed()
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,8 @@ func (mp *mockPeer) RemoteIP() net.IP { return mp.ip }
|
||||
func (mp *mockPeer) SocketAddr() *NetAddress { return nil }
|
||||
func (mp *mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.ip, Port: 8800} }
|
||||
func (mp *mockPeer) CloseConn() error { return nil }
|
||||
func (mp *mockPeer) SetRemovalFailed() {}
|
||||
func (mp *mockPeer) GetRemovalFailed() bool { return false }
|
||||
|
||||
// Returns a mock peer
|
||||
func newMockPeer(ip net.IP) *mockPeer {
|
||||
|
||||
@@ -370,6 +370,10 @@ func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
|
||||
// https://github.com/tendermint/tendermint/issues/3338
|
||||
if sw.peers.Remove(peer) {
|
||||
sw.metrics.Peers.Add(float64(-1))
|
||||
} else {
|
||||
// Removal of the peer has failed. The function above sets a flag within the peer to mark this.
|
||||
// We keep this message here as information to the developer.
|
||||
sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -824,6 +828,12 @@ func (sw *Switch) addPeer(p Peer) error {
|
||||
// so that if Receive errors, we will find the peer and remove it.
|
||||
// Add should not err since we already checked peers.Has().
|
||||
if err := sw.peers.Add(p); err != nil {
|
||||
switch err.(type) {
|
||||
case ErrPeerRemoval:
|
||||
sw.Logger.Error("Error starting peer ",
|
||||
" err ", "Peer has already errored and removal was attempted.",
|
||||
"peer", p.ID())
|
||||
}
|
||||
return err
|
||||
}
|
||||
sw.metrics.Peers.Add(float64(1))
|
||||
|
||||
@@ -836,3 +836,16 @@ func BenchmarkSwitchBroadcast(b *testing.B) {
|
||||
|
||||
b.Logf("success: %v, failure: %v", numSuccess, numFailure)
|
||||
}
|
||||
|
||||
func TestSwitchRemovalErr(t *testing.T) {
|
||||
|
||||
sw1, sw2 := MakeSwitchPair(t, func(i int, sw *Switch) *Switch {
|
||||
return initSwitchFunc(i, sw)
|
||||
})
|
||||
assert.Equal(t, len(sw1.Peers().List()), 1)
|
||||
p := sw1.Peers().List()[0]
|
||||
|
||||
sw2.StopPeerForError(p, fmt.Errorf("peer should error"))
|
||||
|
||||
assert.Equal(t, sw2.peers.Add(p).Error(), ErrPeerRemoval{}.Error())
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ var _ = time.Kitchen
|
||||
// proto package needs to be updated.
|
||||
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
|
||||
|
||||
// BlockIdFlag indicates which BlcokID the signature is for
|
||||
// BlockIdFlag indicates which BlockID the signature is for
|
||||
type BlockIDFlag int32
|
||||
|
||||
const (
|
||||
|
||||
@@ -9,15 +9,15 @@ import "tendermint/crypto/proof.proto";
|
||||
import "tendermint/version/types.proto";
|
||||
import "tendermint/types/validator.proto";
|
||||
|
||||
// BlockIdFlag indicates which BlcokID the signature is for
|
||||
// BlockIdFlag indicates which BlockID the signature is for
|
||||
enum BlockIDFlag {
|
||||
option (gogoproto.goproto_enum_stringer) = true;
|
||||
option (gogoproto.goproto_enum_prefix) = false;
|
||||
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "BlockIDFlagUnknown"];
|
||||
BLOCK_ID_FLAG_ABSENT = 1 [(gogoproto.enumvalue_customname) = "BlockIDFlagAbsent"];
|
||||
BLOCK_ID_FLAG_COMMIT = 2 [(gogoproto.enumvalue_customname) = "BlockIDFlagCommit"];
|
||||
BLOCK_ID_FLAG_NIL = 3 [(gogoproto.enumvalue_customname) = "BlockIDFlagNil"];
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "BlockIDFlagUnknown"]; // indicates an error condition
|
||||
BLOCK_ID_FLAG_ABSENT = 1 [(gogoproto.enumvalue_customname) = "BlockIDFlagAbsent"]; // the vote was not received
|
||||
BLOCK_ID_FLAG_COMMIT = 2 [(gogoproto.enumvalue_customname) = "BlockIDFlagCommit"]; // voted for the block that received the majority
|
||||
BLOCK_ID_FLAG_NIL = 3 [(gogoproto.enumvalue_customname) = "BlockIDFlagNil"]; // voted for nil
|
||||
}
|
||||
|
||||
// SignedMsgType is a type of signed message in the consensus.
|
||||
|
||||
48
scripts/qa/reporting/README.md
Normal file
@@ -0,0 +1,48 @@
|
||||
# Reporting Scripts
|
||||
|
||||
This directory contains just one utility script at present that is used in
|
||||
reporting/QA.
|
||||
|
||||
## Latency vs Throughput Plotting
|
||||
|
||||
[`latency_throughput.py`](./latency_throughput.py) is a Python script that uses
|
||||
[matplotlib] to plot a graph of transaction latency vs throughput rate based on
|
||||
the CSV output generated by the [loadtime reporting
|
||||
tool](../../../test/loadtime/cmd/report/).
|
||||
|
||||
### Setup
|
||||
|
||||
Execute the following within this directory (the same directory as the
|
||||
`latency_throughput.py` file).
|
||||
|
||||
```bash
|
||||
# Create a virtual environment into which to install your dependencies
|
||||
python3 -m venv .venv
|
||||
|
||||
# Activate the virtual environment
|
||||
source .venv/bin/activate
|
||||
|
||||
# Install dependencies listed in requirements.txt
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Show usage instructions and parameters
|
||||
./latency_throughput.py --help
|
||||
```
|
||||
|
||||
### Running
|
||||
|
||||
```bash
|
||||
# Do the following while ensuring that the virtual environment is activated (see
|
||||
# the Setup steps).
|
||||
#
|
||||
# This will generate a plot in a PNG file called 'tm034.png' in the current
|
||||
# directory based on the reporting tool CSV output in the "raw.csv" file. The
|
||||
# '-t' flag overrides the default title at the top of the plot.
|
||||
|
||||
./latency_throughput.py \
|
||||
-t 'Tendermint v0.34.x Latency vs Throughput' \
|
||||
./tm034.png \
|
||||
/path/to/csv/files/raw.csv
|
||||
```
|
||||
|
||||
[matplotlib]: https://matplotlib.org/
|
||||
170
scripts/qa/reporting/latency_throughput.py
Executable file
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
A simple script to parse the CSV output from the loadtime reporting tool (see
|
||||
https://github.com/tendermint/tendermint/tree/main/test/loadtime/cmd/report).
|
||||
|
||||
Produces a plot of average transaction latency vs total transaction throughput
|
||||
according to the number of load testing tool WebSocket connections to the
|
||||
Tendermint node.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import sys
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
DEFAULT_TITLE = "Tendermint latency vs throughput"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Renders a latency vs throughput diagram "
|
||||
"for a set of transactions provided by the loadtime reporting tool",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('-t',
|
||||
'--title',
|
||||
default=DEFAULT_TITLE,
|
||||
help='Plot title')
|
||||
parser.add_argument('output_image',
|
||||
help='Output image file (in PNG format)')
|
||||
parser.add_argument(
|
||||
'input_csv_file',
|
||||
nargs='+',
|
||||
help="CSV input file from which to read transaction data "
|
||||
"- must have been generated by the loadtime reporting tool")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(format='%(levelname)s\t%(message)s',
|
||||
stream=sys.stdout,
|
||||
level=logging.INFO)
|
||||
plot_latency_vs_throughput(args.input_csv_file,
|
||||
args.output_image,
|
||||
title=args.title)
|
||||
|
||||
|
||||
def plot_latency_vs_throughput(input_files, output_image, title=DEFAULT_TITLE):
|
||||
avg_latencies, throughput_rates = process_input_files(input_files, )
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
connections = sorted(avg_latencies.keys())
|
||||
for c in connections:
|
||||
tr = np.array(throughput_rates[c])
|
||||
al = np.array(avg_latencies[c])
|
||||
label = '%d connection%s' % (c, '' if c == 1 else 's')
|
||||
ax.plot(tr, al, 'o-', label=label)
|
||||
|
||||
ax.set_title(title)
|
||||
ax.set_xlabel('Throughput rate (tx/s)')
|
||||
ax.set_ylabel('Average transaction latency (s)')
|
||||
|
||||
plt.legend(loc='upper left')
|
||||
plt.savefig(output_image)
|
||||
|
||||
|
||||
def process_input_files(input_files):
|
||||
# Experimental data from which we will derive the latency vs throughput
|
||||
# statistics
|
||||
experiments = {}
|
||||
|
||||
for input_file in input_files:
|
||||
logging.info('Reading %s...' % input_file)
|
||||
|
||||
with open(input_file, 'rt') as inf:
|
||||
reader = csv.DictReader(inf)
|
||||
for tx in reader:
|
||||
experiments = process_tx(experiments, tx)
|
||||
|
||||
return compute_experiments_stats(experiments)
|
||||
|
||||
|
||||
def process_tx(experiments, tx):
|
||||
exp_id = tx['experiment_id']
|
||||
# Block time is nanoseconds from the epoch - convert to seconds
|
||||
block_time = float(tx['block_time']) / (10**9)
|
||||
# Duration is also in nanoseconds - convert to seconds
|
||||
duration = float(tx['duration_ns']) / (10**9)
|
||||
connections = int(tx['connections'])
|
||||
rate = int(tx['rate'])
|
||||
|
||||
if exp_id not in experiments:
|
||||
experiments[exp_id] = {
|
||||
'connections': connections,
|
||||
'rate': rate,
|
||||
'block_time_min': block_time,
|
||||
# We keep track of the latency associated with the minimum block
|
||||
# time to estimate the start time of the experiment
|
||||
'block_time_min_duration': duration,
|
||||
'block_time_max': block_time,
|
||||
'total_latencies': duration,
|
||||
'tx_count': 1,
|
||||
}
|
||||
logging.info('Found experiment %s with rate=%d, connections=%d' %
|
||||
(exp_id, rate, connections))
|
||||
else:
|
||||
# Validation
|
||||
for field in ['connections', 'rate']:
|
||||
val = int(tx[field])
|
||||
if val != experiments[exp_id][field]:
|
||||
raise Exception(
|
||||
'Found multiple distinct values for field '
|
||||
'"%s" for the same experiment (%s): %d and %d' %
|
||||
(field, exp_id, val, experiments[exp_id][field]))
|
||||
|
||||
if block_time < experiments[exp_id]['block_time_min']:
|
||||
experiments[exp_id]['block_time_min'] = block_time
|
||||
experiments[exp_id]['block_time_min_duration'] = duration
|
||||
if block_time > experiments[exp_id]['block_time_max']:
|
||||
experiments[exp_id]['block_time_max'] = block_time
|
||||
|
||||
experiments[exp_id]['total_latencies'] += duration
|
||||
experiments[exp_id]['tx_count'] += 1
|
||||
|
||||
return experiments
|
||||
|
||||
|
||||
def compute_experiments_stats(experiments):
|
||||
"""Compute average latency vs throughput rate statistics from the given
|
||||
experiments"""
|
||||
stats = {}
|
||||
|
||||
# Compute average latency and throughput rate for each experiment
|
||||
for exp_id, exp in experiments.items():
|
||||
conns = exp['connections']
|
||||
avg_latency = exp['total_latencies'] / exp['tx_count']
|
||||
exp_start_time = exp['block_time_min'] - exp['block_time_min_duration']
|
||||
exp_duration = exp['block_time_max'] - exp_start_time
|
||||
throughput_rate = exp['tx_count'] / exp_duration
|
||||
if conns not in stats:
|
||||
stats[conns] = []
|
||||
|
||||
stats[conns].append({
|
||||
'avg_latency': avg_latency,
|
||||
'throughput_rate': throughput_rate,
|
||||
})
|
||||
|
||||
# Sort stats for each number of connections in order of increasing
|
||||
# throughput rate, and then extract average latencies and throughput rates
|
||||
# as separate data series.
|
||||
conns = sorted(stats.keys())
|
||||
avg_latencies = {}
|
||||
throughput_rates = {}
|
||||
for c in conns:
|
||||
stats[c] = sorted(stats[c], key=lambda s: s['throughput_rate'])
|
||||
avg_latencies[c] = []
|
||||
throughput_rates[c] = []
|
||||
for s in stats[c]:
|
||||
avg_latencies[c].append(s['avg_latency'])
|
||||
throughput_rates[c].append(s['throughput_rate'])
|
||||
logging.info('For %d connection(s): '
|
||||
'throughput rate = %.6f tx/s\t'
|
||||
'average latency = %.6fs' %
|
||||
(c, s['throughput_rate'], s['avg_latency']))
|
||||
|
||||
return (avg_latencies, throughput_rates)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
11
scripts/qa/reporting/requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
contourpy==1.0.5
|
||||
cycler==0.11.0
|
||||
fonttools==4.37.4
|
||||
kiwisolver==1.4.4
|
||||
matplotlib==3.6.1
|
||||
numpy==1.23.4
|
||||
packaging==21.3
|
||||
Pillow==9.2.0
|
||||
pyparsing==3.0.9
|
||||
python-dateutil==2.8.2
|
||||
six==1.16.0
|
||||
@@ -46,7 +46,7 @@ and a list of evidence of malfeasance (ie. signing conflicting votes).
|
||||
|
||||
| Name | Type | Description | Validation |
|
||||
|--------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------|
|
||||
| Header | [Header](#header) | Header corresponding to the block. This field contains information used throughout consensus and other areas of the protocol. To find out what it contains, visit [header] (#header) | Must adhere to the validation rules of [header](#header) |
|
||||
| Header | [Header](#header) | Header corresponding to the block. This field contains information used throughout consensus and other areas of the protocol. To find out what it contains, visit [header](#header) | Must adhere to the validation rules of [header](#header) |
|
||||
| Data | [Data](#data) | Data contains a list of transactions. The contents of the transaction is unknown to Tendermint. | This field can be empty or populated, but no validation is performed. Applications can perform validation on individual transactions prior to block creation using [checkTx](https://github.com/tendermint/tendermint/blob/main/spec/abci/abci++_methods.md#checktx).
|
||||
| Evidence | [EvidenceList](#evidencelist) | Evidence contains a list of infractions committed by validators. | Can be empty, but when populated the validations rules from [evidenceList](#evidencelist) apply |
|
||||
| LastCommit | [Commit](#commit) | `LastCommit` includes one vote for every validator. All votes must either be for the previous block, nil or absent. If a vote is for the previous block it must have a valid signature from the corresponding validator. The sum of the voting power of the validators that voted must be greater than 2/3 of the total voting power of the complete validator set. The number of votes in a commit is limited to 10000 (see `types.MaxVotesCount`). | Must be empty for the initial height and must adhere to the validation rules of [commit](#commit). |
|
||||
@@ -202,12 +202,12 @@ Commit is a simple wrapper for a list of signatures, with one for each validator
|
||||
a particular `BlockID` or was absent. It's a part of the `Commit` and can be used
|
||||
to reconstruct the vote set given the validator set.
|
||||
|
||||
| Name | Type | Description | Validation |
|
||||
|------------------|-----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| BlockIDFlag | [BlockIDFlag](#blockidflag) | Represents the validators participation in consensus: Either voted for the block that received the majority, voted for another block, voted nil or did not vote | Must be one of the fields in the [BlockIDFlag](#blockidflag) enum |
|
||||
| ValidatorAddress | [Address](#address) | Address of the validator | Must be of length 20 |
|
||||
| Timestamp | [Time](#time) | This field will vary from `CommitSig` to `CommitSig`. It represents the timestamp of the validator. | [Time](#time) |
|
||||
| Signature | [Signature](#signature) | Signature corresponding to the validators participation in consensus. | The length of the signature must be > 0 and < than 64 |
|
||||
| Name | Type | Description | Validation |
|
||||
|------------------|-----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| BlockIDFlag | [BlockIDFlag](#blockidflag) | Represents the validators participation in consensus: its vote was not received, voted for the block that received the majority, or voted for nil | Must be one of the fields in the [BlockIDFlag](#blockidflag) enum |
|
||||
| ValidatorAddress | [Address](#address) | Address of the validator | Must be of length 20 |
|
||||
| Timestamp | [Time](#time) | This field will vary from `CommitSig` to `CommitSig`. It represents the timestamp of the validator. | [Time](#time) |
|
||||
| Signature | [Signature](#signature) | Signature corresponding to the validators participation in consensus. | The length of the signature must be > 0 and < than 64 |
|
||||
|
||||
NOTE: `ValidatorAddress` and `Timestamp` fields may be removed in the future
|
||||
(see [ADR-25](https://github.com/tendermint/tendermint/blob/main/docs/architecture/adr-025-commit.md)).
|
||||
@@ -218,10 +218,10 @@ BlockIDFlag represents which BlockID the [signature](#commitsig) is for.
|
||||
|
||||
```go
|
||||
enum BlockIDFlag {
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0;
|
||||
BLOCK_ID_FLAG_ABSENT = 1; // signatures for other blocks are also considered absent
|
||||
BLOCK_ID_FLAG_COMMIT = 2;
|
||||
BLOCK_ID_FLAG_NIL = 3;
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0; // indicates an error condition
|
||||
BLOCK_ID_FLAG_ABSENT = 1; // the vote was not received
|
||||
BLOCK_ID_FLAG_COMMIT = 2; // voted for the block that received the majority
|
||||
BLOCK_ID_FLAG_NIL = 3; // voted for nil
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -131,13 +131,14 @@ object nset = { # the type of node sets
|
||||
object classic_bft = {
|
||||
relation quorum_intersection
|
||||
private {
|
||||
definition [quorum_intersection_def] quorum_intersection = forall Q1,Q2. exists N. well_behaved(N) & nset.member(N, Q1) & nset.member(N, Q2) # every two quorums have a well-behaved node in common
|
||||
definition [quorum_intersection_def] quorum_intersection = forall Q1,Q2. nset.is_quorum(Q1) & nset.is_quorum(Q2)
|
||||
-> exists N. well_behaved(N) & nset.member(N, Q1) & nset.member(N, Q2) # every two quorums have a well-behaved node in common
|
||||
}
|
||||
}
|
||||
|
||||
trusted isolate accountable_bft = {
|
||||
# this is our baseline assumption about quorums:
|
||||
private {
|
||||
property [max_2f_byzantine] exists N . well_behaved(N) & nset.member(N,Q) # every quorum has a well-behaved member
|
||||
property [max_2f_byzantine] nset.is_quorum(Q) -> exists N . well_behaved(N) & nset.member(N,Q) # every quorum has a well-behaved member
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
abci "github.com/tendermint/tendermint/abci/types"
|
||||
"github.com/tendermint/tendermint/state/txindex"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
|
||||
// Register the Postgres database driver.
|
||||
@@ -196,6 +197,55 @@ func TestIndexing(t *testing.T) {
|
||||
err = indexer.IndexTxEvents([]*abci.TxResult{txResult})
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("IndexerService", func(t *testing.T) {
|
||||
indexer := &EventSink{store: testDB(), chainID: chainID}
|
||||
|
||||
// event bus
|
||||
eventBus := types.NewEventBus()
|
||||
err := eventBus.Start()
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
if err := eventBus.Stop(); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
})
|
||||
|
||||
service := txindex.NewIndexerService(indexer.TxIndexer(), indexer.BlockIndexer(), eventBus, true)
|
||||
err = service.Start()
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
if err := service.Stop(); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
})
|
||||
|
||||
// publish block with txs
|
||||
err = eventBus.PublishEventNewBlockHeader(types.EventDataNewBlockHeader{
|
||||
Header: types.Header{Height: 1},
|
||||
NumTxs: int64(2),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
txResult1 := &abci.TxResult{
|
||||
Height: 1,
|
||||
Index: uint32(0),
|
||||
Tx: types.Tx("foo"),
|
||||
Result: abci.ResponseDeliverTx{Code: 0},
|
||||
}
|
||||
err = eventBus.PublishEventTx(types.EventDataTx{TxResult: *txResult1})
|
||||
require.NoError(t, err)
|
||||
txResult2 := &abci.TxResult{
|
||||
Height: 1,
|
||||
Index: uint32(1),
|
||||
Tx: types.Tx("bar"),
|
||||
Result: abci.ResponseDeliverTx{Code: 1},
|
||||
}
|
||||
err = eventBus.PublishEventTx(types.EventDataTx{TxResult: *txResult2})
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
require.True(t, service.IsRunning())
|
||||
})
|
||||
}
|
||||
|
||||
func TestStop(t *testing.T) {
|
||||
|
||||
@@ -3,7 +3,6 @@ package txindex
|
||||
import (
|
||||
"context"
|
||||
|
||||
abci "github.com/tendermint/tendermint/abci/types"
|
||||
"github.com/tendermint/tendermint/libs/service"
|
||||
"github.com/tendermint/tendermint/state/indexer"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
@@ -20,9 +19,10 @@ const (
|
||||
type IndexerService struct {
|
||||
service.BaseService
|
||||
|
||||
txIdxr TxIndexer
|
||||
blockIdxr indexer.BlockIndexer
|
||||
eventBus *types.EventBus
|
||||
txIdxr TxIndexer
|
||||
blockIdxr indexer.BlockIndexer
|
||||
eventBus *types.EventBus
|
||||
terminateOnError bool
|
||||
}
|
||||
|
||||
// NewIndexerService returns a new service instance.
|
||||
@@ -30,9 +30,10 @@ func NewIndexerService(
|
||||
txIdxr TxIndexer,
|
||||
blockIdxr indexer.BlockIndexer,
|
||||
eventBus *types.EventBus,
|
||||
terminateOnError bool,
|
||||
) *IndexerService {
|
||||
|
||||
is := &IndexerService{txIdxr: txIdxr, blockIdxr: blockIdxr, eventBus: eventBus}
|
||||
is := &IndexerService{txIdxr: txIdxr, blockIdxr: blockIdxr, eventBus: eventBus, terminateOnError: terminateOnError}
|
||||
is.BaseService = *service.NewBaseService(nil, "IndexerService", is)
|
||||
return is
|
||||
}
|
||||
@@ -74,24 +75,38 @@ func (is *IndexerService) OnStart() error {
|
||||
"index", txResult.Index,
|
||||
"err", err,
|
||||
)
|
||||
|
||||
if is.terminateOnError {
|
||||
if err := is.Stop(); err != nil {
|
||||
is.Logger.Error("failed to stop", "err", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := is.blockIdxr.Index(eventDataHeader); err != nil {
|
||||
is.Logger.Error("failed to index block", "height", height, "err", err)
|
||||
if is.terminateOnError {
|
||||
if err := is.Stop(); err != nil {
|
||||
is.Logger.Error("failed to stop", "err", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
} else {
|
||||
is.Logger.Info("indexed block", "height", height)
|
||||
}
|
||||
|
||||
batch.Ops, err = DeduplicateBatch(batch.Ops, is.txIdxr)
|
||||
if err != nil {
|
||||
is.Logger.Error("deduplicate batch", "height", height)
|
||||
is.Logger.Info("indexed block exents", "height", height)
|
||||
}
|
||||
|
||||
if err = is.txIdxr.AddBatch(batch); err != nil {
|
||||
is.Logger.Error("failed to index block txs", "height", height, "err", err)
|
||||
if is.terminateOnError {
|
||||
if err := is.Stop(); err != nil {
|
||||
is.Logger.Error("failed to stop", "err", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
} else {
|
||||
is.Logger.Debug("indexed block txs", "height", height, "num_txs", eventDataHeader.NumTxs)
|
||||
is.Logger.Debug("indexed transactions", "height", height, "num_txs", eventDataHeader.NumTxs)
|
||||
}
|
||||
}
|
||||
}()
|
||||
@@ -104,45 +119,3 @@ func (is *IndexerService) OnStop() {
|
||||
_ = is.eventBus.UnsubscribeAll(context.Background(), subscriber)
|
||||
}
|
||||
}
|
||||
|
||||
// DeduplicateBatch consider the case of duplicate txs.
|
||||
// if the current one under investigation is NOT OK, then we need to check
|
||||
// whether there's a previously indexed tx.
|
||||
// SKIP the current tx if the previously indexed record is found and successful.
|
||||
func DeduplicateBatch(ops []*abci.TxResult, txIdxr TxIndexer) ([]*abci.TxResult, error) {
|
||||
result := make([]*abci.TxResult, 0, len(ops))
|
||||
|
||||
// keep track of successful txs in this block in order to suppress latter ones being indexed.
|
||||
var successfulTxsInThisBlock = make(map[string]struct{})
|
||||
|
||||
for _, txResult := range ops {
|
||||
hash := types.Tx(txResult.Tx).Hash()
|
||||
|
||||
if txResult.Result.IsOK() {
|
||||
successfulTxsInThisBlock[string(hash)] = struct{}{}
|
||||
} else {
|
||||
// if it already appeared in current block and was successful, skip.
|
||||
if _, found := successfulTxsInThisBlock[string(hash)]; found {
|
||||
continue
|
||||
}
|
||||
|
||||
// check if this tx hash is already indexed
|
||||
old, err := txIdxr.Get(hash)
|
||||
|
||||
// if db op errored
|
||||
// Not found is not an error
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// if it's already indexed in an older block and was successful, skip.
|
||||
if old != nil && old.Result.Code == abci.CodeTypeOK {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
result = append(result, txResult)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ func TestIndexerServiceIndexesBlocks(t *testing.T) {
|
||||
txIndexer := kv.NewTxIndex(store)
|
||||
blockIndexer := blockidxkv.New(db.NewPrefixDB(store, []byte("block_events")))
|
||||
|
||||
service := txindex.NewIndexerService(txIndexer, blockIndexer, eventBus)
|
||||
service := txindex.NewIndexerService(txIndexer, blockIndexer, eventBus, false)
|
||||
service.SetLogger(log.TestingLogger())
|
||||
err = service.Start()
|
||||
require.NoError(t, err)
|
||||
@@ -79,164 +79,3 @@ func TestIndexerServiceIndexesBlocks(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, txResult2, res)
|
||||
}
|
||||
|
||||
func TestTxIndexDuplicatePreviouslySuccessful(t *testing.T) {
|
||||
var mockTx = types.Tx("MOCK_TX_HASH")
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
tx1 abci.TxResult
|
||||
tx2 abci.TxResult
|
||||
expSkip bool // do we expect the second tx to be skipped by tx indexer
|
||||
}{
|
||||
{"skip, previously successful",
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
abci.TxResult{
|
||||
Height: 2,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
true,
|
||||
},
|
||||
{"not skip, previously unsuccessful",
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
abci.TxResult{
|
||||
Height: 2,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{"not skip, both successful",
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
abci.TxResult{
|
||||
Height: 2,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{"not skip, both unsuccessful",
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
abci.TxResult{
|
||||
Height: 2,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{"skip, same block, previously successful",
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
true,
|
||||
},
|
||||
{"not skip, same block, previously unsuccessful",
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
indexer := kv.NewTxIndex(db.NewMemDB())
|
||||
|
||||
if tc.tx1.Height != tc.tx2.Height {
|
||||
// index the first tx
|
||||
err := indexer.AddBatch(&txindex.Batch{
|
||||
Ops: []*abci.TxResult{&tc.tx1},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// check if the second one should be skipped.
|
||||
ops, err := txindex.DeduplicateBatch([]*abci.TxResult{&tc.tx2}, indexer)
|
||||
require.NoError(t, err)
|
||||
|
||||
if tc.expSkip {
|
||||
require.Empty(t, ops)
|
||||
} else {
|
||||
require.Equal(t, []*abci.TxResult{&tc.tx2}, ops)
|
||||
}
|
||||
} else {
|
||||
// same block
|
||||
ops := []*abci.TxResult{&tc.tx1, &tc.tx2}
|
||||
ops, err := txindex.DeduplicateBatch(ops, indexer)
|
||||
require.NoError(t, err)
|
||||
if tc.expSkip {
|
||||
// the second one is skipped
|
||||
require.Equal(t, []*abci.TxResult{&tc.tx1}, ops)
|
||||
} else {
|
||||
require.Equal(t, []*abci.TxResult{&tc.tx1, &tc.tx2}, ops)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,12 +102,30 @@ func (txi *TxIndex) AddBatch(b *txindex.Batch) error {
|
||||
// that indexed from the tx's events is a composite of the event type and the
|
||||
// respective attribute's key delimited by a "." (eg. "account.number").
|
||||
// Any event with an empty type is not indexed.
|
||||
//
|
||||
// If a transaction is indexed with the same hash as a previous transaction, it will
|
||||
// be overwritten unless the tx result was NOT OK and the prior result was OK i.e.
|
||||
// more transactions that successfully executed overwrite transactions that failed
|
||||
// or successful yet older transactions.
|
||||
func (txi *TxIndex) Index(result *abci.TxResult) error {
|
||||
b := txi.store.NewBatch()
|
||||
defer b.Close()
|
||||
|
||||
hash := types.Tx(result.Tx).Hash()
|
||||
|
||||
if !result.Result.IsOK() {
|
||||
oldResult, err := txi.Get(hash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// if the new transaction failed and it's already indexed in an older block and was successful
|
||||
// we skip it as we want users to get the older successful transaction when they query.
|
||||
if oldResult != nil && oldResult.Result.Code == abci.CodeTypeOK {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// index tx by events
|
||||
err := txi.indexEvents(result, hash, b)
|
||||
if err != nil {
|
||||
|
||||
@@ -258,6 +258,103 @@ func TestTxSearchOneTxWithMultipleSameTagsButDifferentValues(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTxIndexDuplicatePreviouslySuccessful(t *testing.T) {
|
||||
var mockTx = types.Tx("MOCK_TX_HASH")
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
tx1 *abci.TxResult
|
||||
tx2 *abci.TxResult
|
||||
expOverwrite bool // do we expect the second tx to overwrite the first tx
|
||||
}{
|
||||
{
|
||||
"don't overwrite as a non-zero code was returned and the previous tx was successful",
|
||||
&abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
&abci.TxResult{
|
||||
Height: 2,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"overwrite as the previous tx was also unsuccessful",
|
||||
&abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
&abci.TxResult{
|
||||
Height: 2,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK + 1,
|
||||
},
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"overwrite as the most recent tx was successful",
|
||||
&abci.TxResult{
|
||||
Height: 1,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
&abci.TxResult{
|
||||
Height: 2,
|
||||
Index: 0,
|
||||
Tx: mockTx,
|
||||
Result: abci.ResponseDeliverTx{
|
||||
Code: abci.CodeTypeOK,
|
||||
},
|
||||
},
|
||||
true,
|
||||
},
|
||||
}
|
||||
|
||||
hash := mockTx.Hash()
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
indexer := NewTxIndex(db.NewMemDB())
|
||||
|
||||
// index the first tx
|
||||
err := indexer.Index(tc.tx1)
|
||||
require.NoError(t, err)
|
||||
|
||||
// index the same tx with different results
|
||||
err = indexer.Index(tc.tx2)
|
||||
require.NoError(t, err)
|
||||
|
||||
res, err := indexer.Get(hash)
|
||||
require.NoError(t, err)
|
||||
|
||||
if tc.expOverwrite {
|
||||
require.Equal(t, tc.tx2, res)
|
||||
} else {
|
||||
require.Equal(t, tc.tx1, res)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTxSearchMultipleTxs(t *testing.T) {
|
||||
indexer := NewTxIndex(db.NewMemDB())
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ type Config struct {
|
||||
PrivValServer string `toml:"privval_server"`
|
||||
PrivValKey string `toml:"privval_key"`
|
||||
PrivValState string `toml:"privval_state"`
|
||||
Misbehaviors map[string]string `toml:"misbehaviors"`
|
||||
KeyType string `toml:"key_type"`
|
||||
}
|
||||
|
||||
|
||||
85
test/e2e/pkg/infra/docker/docker.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package docker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"text/template"
|
||||
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra"
|
||||
)
|
||||
|
||||
var _ infra.Provider = &Provider{}
|
||||
|
||||
// Provider implements a docker-compose backed infrastructure provider.
|
||||
type Provider struct {
|
||||
Testnet *e2e.Testnet
|
||||
}
|
||||
|
||||
// Setup generates the docker-compose file and write it to disk, erroring if
|
||||
// any of these operations fail.
|
||||
func (p *Provider) Setup() error {
|
||||
compose, err := dockerComposeBytes(p.Testnet)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
//nolint: gosec
|
||||
// G306: Expect WriteFile permissions to be 0600 or less
|
||||
err = os.WriteFile(filepath.Join(p.Testnet.Dir, "docker-compose.yml"), compose, 0644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// dockerComposeBytes generates a Docker Compose config file for a testnet and returns the
|
||||
// file as bytes to be written out to disk.
|
||||
func dockerComposeBytes(testnet *e2e.Testnet) ([]byte, error) {
|
||||
// Must use version 2 Docker Compose format, to support IPv6.
|
||||
tmpl, err := template.New("docker-compose").Parse(`version: '2.4'
|
||||
networks:
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
driver: bridge
|
||||
{{- if .IPv6 }}
|
||||
enable_ipv6: true
|
||||
{{- end }}
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
- subnet: {{ .IP }}
|
||||
|
||||
services:
|
||||
{{- range .Nodes }}
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
container_name: {{ .Name }}
|
||||
image: tendermint/e2e-node
|
||||
{{- if eq .ABCIProtocol "builtin" }}
|
||||
entrypoint: /usr/bin/entrypoint-builtin
|
||||
{{- end }}
|
||||
init: true
|
||||
ports:
|
||||
- 26656
|
||||
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
|
||||
- 6060
|
||||
volumes:
|
||||
- ./{{ .Name }}:/tendermint
|
||||
networks:
|
||||
{{ $.Name }}:
|
||||
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
|
||||
|
||||
{{end}}`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
err = tmpl.Execute(&buf, testnet)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
20
test/e2e/pkg/infra/provider.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package infra
|
||||
|
||||
// Provider defines an API for manipulating the infrastructure of a
|
||||
// specific set of testnet infrastructure.
|
||||
type Provider interface {
|
||||
|
||||
// Setup generates any necessary configuration for the infrastructure
|
||||
// provider during testnet setup.
|
||||
Setup() error
|
||||
}
|
||||
|
||||
// NoopProvider implements the provider interface by performing noops for every
|
||||
// interface method. This may be useful if the infrastructure is managed by a
|
||||
// separate process.
|
||||
type NoopProvider struct {
|
||||
}
|
||||
|
||||
func (NoopProvider) Setup() error { return nil }
|
||||
|
||||
var _ Provider = NoopProvider{}
|
||||
80
test/e2e/pkg/infrastructure.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerIPv4CIDR = "10.186.73.0/24"
|
||||
dockerIPv6CIDR = "fd80:b10c::/48"
|
||||
|
||||
globalIPv4CIDR = "0.0.0.0/0"
|
||||
)
|
||||
|
||||
// InfrastructureData contains the relevant information for a set of existing
|
||||
// infrastructure that is to be used for running a testnet.
|
||||
type InfrastructureData struct {
|
||||
|
||||
// Provider is the name of infrastructure provider backing the testnet.
|
||||
// For example, 'docker' if it is running locally in a docker network or
|
||||
// 'digital-ocean', 'aws', 'google', etc. if it is from a cloud provider.
|
||||
Provider string `json:"provider"`
|
||||
|
||||
// Instances is a map of all of the machine instances on which to run
|
||||
// processes for a testnet.
|
||||
// The key of the map is the name of the instance, which each must correspond
|
||||
// to the names of one of the testnet nodes defined in the testnet manifest.
|
||||
Instances map[string]InstanceData `json:"instances"`
|
||||
|
||||
// Network is the CIDR notation range of IP addresses that all of the instances'
|
||||
// IP addresses are expected to be within.
|
||||
Network string `json:"network"`
|
||||
}
|
||||
|
||||
// InstanceData contains the relevant information for a machine instance backing
|
||||
// one of the nodes in the testnet.
|
||||
type InstanceData struct {
|
||||
IPAddress net.IP `json:"ip_address"`
|
||||
}
|
||||
|
||||
func NewDockerInfrastructureData(m Manifest) (InfrastructureData, error) {
|
||||
netAddress := dockerIPv4CIDR
|
||||
if m.IPv6 {
|
||||
netAddress = dockerIPv6CIDR
|
||||
}
|
||||
_, ipNet, err := net.ParseCIDR(netAddress)
|
||||
if err != nil {
|
||||
return InfrastructureData{}, fmt.Errorf("invalid IP network address %q: %w", netAddress, err)
|
||||
}
|
||||
ipGen := newIPGenerator(ipNet)
|
||||
ifd := InfrastructureData{
|
||||
Provider: "docker",
|
||||
Instances: make(map[string]InstanceData),
|
||||
Network: netAddress,
|
||||
}
|
||||
for name := range m.Nodes {
|
||||
ifd.Instances[name] = InstanceData{
|
||||
IPAddress: ipGen.Next(),
|
||||
}
|
||||
}
|
||||
return ifd, nil
|
||||
}
|
||||
|
||||
func InfrastructureDataFromFile(p string) (InfrastructureData, error) {
|
||||
ifd := InfrastructureData{}
|
||||
b, err := os.ReadFile(p)
|
||||
if err != nil {
|
||||
return InfrastructureData{}, err
|
||||
}
|
||||
err = json.Unmarshal(b, &ifd)
|
||||
if err != nil {
|
||||
return InfrastructureData{}, err
|
||||
}
|
||||
if ifd.Network == "" {
|
||||
ifd.Network = globalIPv4CIDR
|
||||
}
|
||||
return ifd, nil
|
||||
}
|
||||
@@ -21,8 +21,6 @@ import (
|
||||
const (
|
||||
randomSeed int64 = 2308084734268
|
||||
proxyPortFirst uint32 = 5701
|
||||
networkIPv4 = "10.186.73.0/24"
|
||||
networkIPv6 = "fd80:b10c::/48"
|
||||
)
|
||||
|
||||
type (
|
||||
@@ -100,32 +98,20 @@ type Node struct {
|
||||
// The testnet generation must be deterministic, since it is generated
|
||||
// separately by the runner and the test cases. For this reason, testnets use a
|
||||
// random seed to generate e.g. keys.
|
||||
func LoadTestnet(file string) (*Testnet, error) {
|
||||
manifest, err := LoadManifest(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dir := strings.TrimSuffix(file, filepath.Ext(file))
|
||||
|
||||
// Set up resource generators. These must be deterministic.
|
||||
netAddress := networkIPv4
|
||||
if manifest.IPv6 {
|
||||
netAddress = networkIPv6
|
||||
}
|
||||
_, ipNet, err := net.ParseCIDR(netAddress)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid IP network address %q: %w", netAddress, err)
|
||||
}
|
||||
|
||||
ipGen := newIPGenerator(ipNet)
|
||||
func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Testnet, error) {
|
||||
dir := strings.TrimSuffix(fname, filepath.Ext(fname))
|
||||
keyGen := newKeyGenerator(randomSeed)
|
||||
proxyPortGen := newPortGenerator(proxyPortFirst)
|
||||
_, ipNet, err := net.ParseCIDR(ifd.Network)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid IP network address %q: %w", ifd.Network, err)
|
||||
}
|
||||
|
||||
testnet := &Testnet{
|
||||
Name: filepath.Base(dir),
|
||||
File: file,
|
||||
File: fname,
|
||||
Dir: dir,
|
||||
IP: ipGen.Network(),
|
||||
IP: ipNet,
|
||||
InitialHeight: 1,
|
||||
InitialState: manifest.InitialState,
|
||||
Validators: map[*Node]int64{},
|
||||
@@ -156,12 +142,16 @@ func LoadTestnet(file string) (*Testnet, error) {
|
||||
|
||||
for _, name := range nodeNames {
|
||||
nodeManifest := manifest.Nodes[name]
|
||||
ind, ok := ifd.Instances[name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("information for node '%s' missing from infrastucture data", name)
|
||||
}
|
||||
node := &Node{
|
||||
Name: name,
|
||||
Testnet: testnet,
|
||||
PrivvalKey: keyGen.Generate(manifest.KeyType),
|
||||
NodeKey: keyGen.Generate("ed25519"),
|
||||
IP: ipGen.Next(),
|
||||
IP: ind.IPAddress,
|
||||
ProxyPort: proxyPortGen.Next(),
|
||||
Mode: ModeValidator,
|
||||
Database: "goleveldb",
|
||||
|
||||
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
@@ -11,6 +12,8 @@ import (
|
||||
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra/docker"
|
||||
)
|
||||
|
||||
const randomSeed = 2308084734268
|
||||
@@ -26,6 +29,7 @@ type CLI struct {
|
||||
root *cobra.Command
|
||||
testnet *e2e.Testnet
|
||||
preserve bool
|
||||
infp infra.Provider
|
||||
}
|
||||
|
||||
// NewCLI sets up the CLI.
|
||||
@@ -41,19 +45,57 @@ func NewCLI() *CLI {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
testnet, err := e2e.LoadTestnet(file)
|
||||
m, err := e2e.LoadManifest(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
inft, err := cmd.Flags().GetString("infrastructure-type")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var ifd e2e.InfrastructureData
|
||||
switch inft {
|
||||
case "docker":
|
||||
var err error
|
||||
ifd, err = e2e.NewDockerInfrastructureData(m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "digital-ocean":
|
||||
p, err := cmd.Flags().GetString("infrastructure-data")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p == "" {
|
||||
return errors.New("'--infrastructure-data' must be set when using the 'digital-ocean' infrastructure-type")
|
||||
}
|
||||
ifd, err = e2e.InfrastructureDataFromFile(p)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing infrastructure data: %s", err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown infrastructure type '%s'", inft)
|
||||
}
|
||||
|
||||
testnet, err := e2e.LoadTestnet(m, file, ifd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("loading testnet: %s", err)
|
||||
}
|
||||
|
||||
cli.testnet = testnet
|
||||
cli.infp = &infra.NoopProvider{}
|
||||
if inft == "docker" {
|
||||
cli.infp = &docker.Provider{Testnet: testnet}
|
||||
}
|
||||
return nil
|
||||
},
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
if err := Cleanup(cli.testnet); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Setup(cli.testnet); err != nil {
|
||||
if err := Setup(cli.testnet, cli.infp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -118,6 +160,10 @@ func NewCLI() *CLI {
|
||||
cli.root.PersistentFlags().StringP("file", "f", "", "Testnet TOML manifest")
|
||||
_ = cli.root.MarkPersistentFlagRequired("file")
|
||||
|
||||
cli.root.PersistentFlags().StringP("infrastructure-type", "", "docker", "Backing infrastructure used to run the testnet. Either 'digital-ocean' or 'docker'")
|
||||
|
||||
cli.root.PersistentFlags().StringP("infrastructure-data", "", "", "path to the json file containing the infrastructure data. Only used if the 'infrastructure-type' is set to a value other than 'docker'")
|
||||
|
||||
cli.root.Flags().BoolVarP(&cli.preserve, "preserve", "p", false,
|
||||
"Preserves the running of the test net after tests are completed")
|
||||
|
||||
@@ -125,7 +171,7 @@ func NewCLI() *CLI {
|
||||
Use: "setup",
|
||||
Short: "Generates the testnet directory and configuration",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return Setup(cli.testnet)
|
||||
return Setup(cli.testnet, cli.infp)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -135,7 +181,7 @@ func NewCLI() *CLI {
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
_, err := os.Stat(cli.testnet.Dir)
|
||||
if os.IsNotExist(err) {
|
||||
err = Setup(cli.testnet)
|
||||
err = Setup(cli.testnet, cli.infp)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -258,7 +304,7 @@ Does not run any perbutations.
|
||||
if err := Cleanup(cli.testnet); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Setup(cli.testnet); err != nil {
|
||||
if err := Setup(cli.testnet, cli.infp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -10,9 +10,7 @@ import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
@@ -23,6 +21,7 @@ import (
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
"github.com/tendermint/tendermint/privval"
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
)
|
||||
|
||||
@@ -39,7 +38,7 @@ const (
|
||||
)
|
||||
|
||||
// Setup sets up the testnet configuration.
|
||||
func Setup(testnet *e2e.Testnet) error {
|
||||
func Setup(testnet *e2e.Testnet, infp infra.Provider) error {
|
||||
logger.Info("setup", "msg", log.NewLazySprintf("Generating testnet files in %q", testnet.Dir))
|
||||
|
||||
err := os.MkdirAll(testnet.Dir, os.ModePerm)
|
||||
@@ -47,11 +46,7 @@ func Setup(testnet *e2e.Testnet) error {
|
||||
return err
|
||||
}
|
||||
|
||||
compose, err := MakeDockerCompose(testnet)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = os.WriteFile(filepath.Join(testnet.Dir, "docker-compose.yml"), compose, 0o644) //nolint:gosec
|
||||
err = infp.Setup()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -126,70 +121,6 @@ func Setup(testnet *e2e.Testnet) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MakeDockerCompose generates a Docker Compose config for a testnet.
|
||||
func MakeDockerCompose(testnet *e2e.Testnet) ([]byte, error) {
|
||||
// Must use version 2 Docker Compose format, to support IPv6.
|
||||
tmpl, err := template.New("docker-compose").Funcs(template.FuncMap{
|
||||
"misbehaviorsToString": func(misbehaviors map[int64]string) string {
|
||||
str := ""
|
||||
for height, misbehavior := range misbehaviors {
|
||||
// after the first behavior set, a comma must be prepended
|
||||
if str != "" {
|
||||
str += ","
|
||||
}
|
||||
heightString := strconv.Itoa(int(height))
|
||||
str += misbehavior + "," + heightString
|
||||
}
|
||||
return str
|
||||
},
|
||||
}).Parse(`version: '2.4'
|
||||
|
||||
networks:
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
driver: bridge
|
||||
{{- if .IPv6 }}
|
||||
enable_ipv6: true
|
||||
{{- end }}
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
- subnet: {{ .IP }}
|
||||
|
||||
services:
|
||||
{{- range .Nodes }}
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
container_name: {{ .Name }}
|
||||
image: tendermint/e2e-node
|
||||
{{- if eq .ABCIProtocol "builtin" }}
|
||||
entrypoint: /usr/bin/entrypoint-builtin
|
||||
{{- end }}
|
||||
init: true
|
||||
ports:
|
||||
- 26656
|
||||
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
|
||||
- 6060
|
||||
volumes:
|
||||
- ./{{ .Name }}:/tendermint
|
||||
networks:
|
||||
{{ $.Name }}:
|
||||
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
|
||||
|
||||
{{end}}`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
err = tmpl.Execute(&buf, testnet)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
// MakeGenesis generates a genesis document.
|
||||
func MakeGenesis(testnet *e2e.Testnet) (types.GenesisDoc, error) {
|
||||
genesis := types.GenesisDoc{
|
||||
|
||||
@@ -66,23 +66,27 @@ func testNode(t *testing.T, testFunc func(*testing.T, e2e.Node)) {
|
||||
func loadTestnet(t *testing.T) e2e.Testnet {
|
||||
t.Helper()
|
||||
|
||||
manifest := os.Getenv("E2E_MANIFEST")
|
||||
if manifest == "" {
|
||||
manifestFile := os.Getenv("E2E_MANIFEST")
|
||||
if manifestFile == "" {
|
||||
t.Skip("E2E_MANIFEST not set, not an end-to-end test run")
|
||||
}
|
||||
if !filepath.IsAbs(manifest) {
|
||||
manifest = filepath.Join("..", manifest)
|
||||
if !filepath.IsAbs(manifestFile) {
|
||||
manifestFile = filepath.Join("..", manifestFile)
|
||||
}
|
||||
|
||||
testnetCacheMtx.Lock()
|
||||
defer testnetCacheMtx.Unlock()
|
||||
if testnet, ok := testnetCache[manifest]; ok {
|
||||
if testnet, ok := testnetCache[manifestFile]; ok {
|
||||
return testnet
|
||||
}
|
||||
|
||||
testnet, err := e2e.LoadTestnet(manifest)
|
||||
m, err := e2e.LoadManifest(manifestFile)
|
||||
require.NoError(t, err)
|
||||
testnetCache[manifest] = *testnet
|
||||
ifd, err := e2e.NewDockerInfrastructureData(m)
|
||||
require.NoError(t, err)
|
||||
|
||||
testnet, err := e2e.LoadTestnet(m, manifestFile, ifd)
|
||||
require.NoError(t, err)
|
||||
testnetCache[manifestFile] = *testnet
|
||||
return *testnet
|
||||
}
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ func toCSVRecords(rs []report.Report) [][]string {
|
||||
}
|
||||
res := make([][]string, total+1)
|
||||
|
||||
res[0] = []string{"experiment_id", "duration_ns", "block_time", "connections", "rate", "size"}
|
||||
res[0] = []string{"experiment_id", "block_time", "duration_ns", "tx_hash", "connections", "rate", "size"}
|
||||
offset := 1
|
||||
for _, r := range rs {
|
||||
idStr := r.ID.String()
|
||||
@@ -95,7 +95,7 @@ func toCSVRecords(rs []report.Report) [][]string {
|
||||
rateStr := strconv.FormatInt(int64(r.Rate), 10)
|
||||
sizeStr := strconv.FormatInt(int64(r.Size), 10)
|
||||
for i, v := range r.All {
|
||||
res[offset+i] = []string{idStr, strconv.FormatInt(int64(v.Duration), 10), strconv.FormatInt(v.BlockTime.UnixNano(), 10), connStr, rateStr, sizeStr}
|
||||
res[offset+i] = []string{idStr, strconv.FormatInt(v.BlockTime.UnixNano(), 10), strconv.FormatInt(int64(v.Duration), 10), fmt.Sprintf("%X", v.Hash), connStr, rateStr, sizeStr}
|
||||
}
|
||||
offset += len(r.All)
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package payload
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"errors"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
)
|
||||
|
||||
const keyPrefix = "a="
|
||||
const maxPayloadSize = 4 * 1024 * 1024
|
||||
|
||||
// NewBytes generates a new payload and returns the encoded representation of
|
||||
// the payload as a slice of bytes. NewBytes uses the fields on the Options
|
||||
@@ -25,10 +26,16 @@ func NewBytes(p *Payload) ([]byte, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.Size < uint64(us) {
|
||||
return nil, fmt.Errorf("configured size %d not large enough to fit unpadded transaction of size %d", p.Size, us)
|
||||
if p.Size > maxPayloadSize {
|
||||
return nil, fmt.Errorf("configured size %d is too large (>%d)", p.Size, maxPayloadSize)
|
||||
}
|
||||
p.Padding = make([]byte, p.Size-uint64(us))
|
||||
pSize := int(p.Size) // #nosec -- The "if" above makes this cast safe
|
||||
if pSize < us {
|
||||
return nil, fmt.Errorf("configured size %d not large enough to fit unpadded transaction of size %d", pSize, us)
|
||||
}
|
||||
|
||||
// We halve the padding size because we transform the TX to hex
|
||||
p.Padding = make([]byte, (pSize-us)/2)
|
||||
_, err = rand.Read(p.Padding)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -37,22 +44,28 @@ func NewBytes(p *Payload) ([]byte, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
h := []byte(hex.EncodeToString(b))
|
||||
|
||||
// prepend a single key so that the kv store only ever stores a single
|
||||
// transaction instead of storing all tx and ballooning in size.
|
||||
return append([]byte(keyPrefix), b...), nil
|
||||
return append([]byte(keyPrefix), h...), nil
|
||||
}
|
||||
|
||||
// FromBytes extracts a paylod from the byte representation of the payload.
|
||||
// FromBytes leaves the padding untouched, returning it to the caller to handle
|
||||
// or discard per their preference.
|
||||
func FromBytes(b []byte) (*Payload, error) {
|
||||
p := &Payload{}
|
||||
tr := bytes.TrimPrefix(b, []byte(keyPrefix))
|
||||
if bytes.Equal(b, tr) {
|
||||
return nil, errors.New("payload bytes missing key prefix")
|
||||
trH := bytes.TrimPrefix(b, []byte(keyPrefix))
|
||||
if bytes.Equal(b, trH) {
|
||||
return nil, fmt.Errorf("payload bytes missing key prefix '%s'", keyPrefix)
|
||||
}
|
||||
err := proto.Unmarshal(tr, p)
|
||||
trB, err := hex.DecodeString(string(trH))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p := &Payload{}
|
||||
err = proto.Unmarshal(trB, p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -83,5 +96,6 @@ func CalculateUnpaddedSize(p *Payload) (int, error) {
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return len(b) + len(keyPrefix), nil
|
||||
h := []byte(hex.EncodeToString(b))
|
||||
return len(h) + len(keyPrefix), nil
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ type BlockStore interface {
|
||||
type DataPoint struct {
|
||||
Duration time.Duration
|
||||
BlockTime time.Time
|
||||
Hash []byte
|
||||
}
|
||||
|
||||
// Report contains the data calculated from reading the timestamped transactions
|
||||
@@ -68,7 +69,7 @@ func (rs *Reports) ErrorCount() int {
|
||||
return rs.errorCount
|
||||
}
|
||||
|
||||
func (rs *Reports) addDataPoint(id uuid.UUID, l time.Duration, bt time.Time, conns, rate, size uint64) {
|
||||
func (rs *Reports) addDataPoint(id uuid.UUID, l time.Duration, bt time.Time, hash []byte, conns, rate, size uint64) {
|
||||
r, ok := rs.s[id]
|
||||
if !ok {
|
||||
r = Report{
|
||||
@@ -81,7 +82,7 @@ func (rs *Reports) addDataPoint(id uuid.UUID, l time.Duration, bt time.Time, con
|
||||
}
|
||||
rs.s[id] = r
|
||||
}
|
||||
r.All = append(r.All, DataPoint{Duration: l, BlockTime: bt})
|
||||
r.All = append(r.All, DataPoint{Duration: l, BlockTime: bt, Hash: hash})
|
||||
if l > r.Max {
|
||||
r.Max = l
|
||||
}
|
||||
@@ -123,11 +124,12 @@ func GenerateFromBlockStore(s BlockStore) (*Reports, error) {
|
||||
id uuid.UUID
|
||||
l time.Duration
|
||||
bt time.Time
|
||||
hash []byte
|
||||
connections, rate, size uint64
|
||||
err error
|
||||
}
|
||||
type txData struct {
|
||||
tx []byte
|
||||
tx types.Tx
|
||||
bt time.Time
|
||||
}
|
||||
reports := &Reports{
|
||||
@@ -161,6 +163,7 @@ func GenerateFromBlockStore(s BlockStore) (*Reports, error) {
|
||||
pdc <- payloadData{
|
||||
l: l,
|
||||
bt: b.bt,
|
||||
hash: b.tx.Hash(),
|
||||
id: uuid.UUID(*idb),
|
||||
connections: p.Connections,
|
||||
rate: p.Rate,
|
||||
@@ -202,7 +205,7 @@ func GenerateFromBlockStore(s BlockStore) (*Reports, error) {
|
||||
reports.addError()
|
||||
continue
|
||||
}
|
||||
reports.addDataPoint(pd.id, pd.l, pd.bt, pd.connections, pd.rate, pd.size)
|
||||
reports.addDataPoint(pd.id, pd.l, pd.bt, pd.hash, pd.connections, pd.rate, pd.size)
|
||||
}
|
||||
reports.calculateAll()
|
||||
return reports, nil
|
||||
|
||||