Compare commits
35 Commits
wb/impleme
...
wb/issue-9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06050e4324 | ||
|
|
4fbfea79ad | ||
|
|
241be64da5 | ||
|
|
2c40ca52c1 | ||
|
|
3136b7a084 | ||
|
|
c755d03611 | ||
|
|
7a2e44184b | ||
|
|
77e7318613 | ||
|
|
caa75ae791 | ||
|
|
af2981a2f7 | ||
|
|
f9307cac51 | ||
|
|
3bd2153136 | ||
|
|
301211c2cb | ||
|
|
58ee42ca52 | ||
|
|
6e38fff9ed | ||
|
|
93ab364abc | ||
|
|
1c60efc0bc | ||
|
|
6768b98568 | ||
|
|
3cdfbda2eb | ||
|
|
4552cfc271 | ||
|
|
91fba07e49 | ||
|
|
38d1b2f873 | ||
|
|
e4fb662c8d | ||
|
|
810b9c613b | ||
|
|
341cabec0e | ||
|
|
5b98095ac3 | ||
|
|
59b28e71a0 | ||
|
|
071d787a45 | ||
|
|
a54a424478 | ||
|
|
c961fb58eb | ||
|
|
c7140bf817 | ||
|
|
5df9c410ff | ||
|
|
c8f203293d | ||
|
|
b06e1cea54 | ||
|
|
6ea968d576 |
6
.github/workflows/docker.yml
vendored
@@ -41,17 +41,17 @@ jobs:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Build
|
||||
uses: docker/setup-buildx-action@v2.0.0
|
||||
uses: docker/setup-buildx-action@v2.1.0
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
uses: docker/login-action@v2.0.0
|
||||
uses: docker/login-action@v2.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Publish to Docker Hub
|
||||
uses: docker/build-push-action@v3.1.1
|
||||
uses: docker/build-push-action@v3.2.0
|
||||
with:
|
||||
context: .
|
||||
file: ./DOCKER/Dockerfile
|
||||
|
||||
4
.github/workflows/e2e-nightly-34x.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
4
.github/workflows/e2e-nightly-37x.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
4
.github/workflows/e2e-nightly-main.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
2
.github/workflows/fuzz-nightly.yml
vendored
@@ -76,7 +76,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
|
||||
2
.github/workflows/janitor.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
steps:
|
||||
- uses: styfle/cancel-workflow-action@0.10.1
|
||||
- uses: styfle/cancel-workflow-action@0.11.0
|
||||
with:
|
||||
workflow_id: 1041851,1401230,2837803
|
||||
access_token: ${{ github.token }}
|
||||
|
||||
2
.gitignore
vendored
@@ -55,3 +55,5 @@ proto/spec/**/*.pb.go
|
||||
*.pdf
|
||||
*.gz
|
||||
*.dvi
|
||||
# Python virtual environments
|
||||
.venv
|
||||
|
||||
30
CHANGELOG.md
@@ -2,6 +2,36 @@
|
||||
|
||||
Friendly reminder, we have a [bug bounty program](https://hackerone.com/cosmos).
|
||||
|
||||
## v0.34.22
|
||||
|
||||
This release includes several bug fixes, [one of
|
||||
which](https://github.com/tendermint/tendermint/pull/9518) we discovered while
|
||||
building up a baseline for v0.34 against which to compare our upcoming v0.37
|
||||
release during our [QA process](./docs/qa/).
|
||||
|
||||
Special thanks to external contributors on this release: @RiccardoM
|
||||
|
||||
### FEATURES
|
||||
|
||||
- [rpc] [\#9423](https://github.com/tendermint/tendermint/pull/9423) Support
|
||||
HTTPS URLs from the WebSocket client (@RiccardoM, @cmwaters)
|
||||
|
||||
### BUG FIXES
|
||||
|
||||
- [config] [\#9483](https://github.com/tendermint/tendermint/issues/9483)
|
||||
Calling `tendermint init` would incorrectly leave out the new `[storage]`
|
||||
section delimiter in the generated configuration file - this has now been
|
||||
fixed
|
||||
- [p2p] [\#9500](https://github.com/tendermint/tendermint/issues/9500) Prevent
|
||||
peers who have errored being added to the peer set (@jmalicevic)
|
||||
- [indexer] [\#9473](https://github.com/tendermint/tendermint/issues/9473) Fix
|
||||
bug that caused the psql indexer to index empty blocks whenever one of the
|
||||
transactions returned a non zero code. The relevant deduplication logic has
|
||||
been moved within the kv indexer only (@cmwaters)
|
||||
- [blocksync] [\#9518](https://github.com/tendermint/tendermint/issues/9518) A
|
||||
block sync stall was observed during our QA process whereby the node was
|
||||
unable to make progress. Retrying block requests after a timeout fixes this.
|
||||
|
||||
## v0.34.21
|
||||
|
||||
Release highlights include:
|
||||
|
||||
2
Makefile
@@ -271,7 +271,7 @@ format:
|
||||
|
||||
lint:
|
||||
@echo "--> Running linter"
|
||||
@golangci-lint run
|
||||
@go run github.com/golangci/golangci-lint/cmd/golangci-lint run
|
||||
.PHONY: lint
|
||||
|
||||
DESTINATION = ./index.html.md
|
||||
|
||||
@@ -703,9 +703,6 @@ type MempoolConfig struct {
|
||||
// Mempool version to use:
|
||||
// 1) "v0" - (default) FIFO mempool.
|
||||
// 2) "v1" - prioritized mempool.
|
||||
// WARNING: There's a known memory leak with the prioritized mempool
|
||||
// that the team are working on. Read more here:
|
||||
// https://github.com/tendermint/tendermint/issues/8775
|
||||
Version string `mapstructure:"version"`
|
||||
RootDir string `mapstructure:"home"`
|
||||
Recheck bool `mapstructure:"recheck"`
|
||||
|
||||
@@ -99,4 +99,4 @@ configuration file that we can update with PRs.
|
||||
Because the build processes are identical (as is the information contained
|
||||
herein), this file should be kept in sync as much as possible with its
|
||||
[counterpart in the Cosmos SDK
|
||||
repo](https://github.com/cosmos/cosmos-sdk/blob/master/docs/DOCS_README.md).
|
||||
repo](https://github.com/cosmos/cosmos-sdk/blob/main/docs/README.md).
|
||||
|
||||
@@ -61,7 +61,7 @@ The following protocols and application features require a reliable source of ti
|
||||
* Tendermint Light Clients [rely on correspondence between their known time](https://github.com/tendermint/tendermint/blob/main/spec/light-client/verification/README.md#definitions-1) and the block time for block verification.
|
||||
* Tendermint Evidence validity is determined [either in terms of heights or in terms of time](https://github.com/tendermint/tendermint/blob/8029cf7a0fcc89a5004e173ec065aa48ad5ba3c8/spec/consensus/evidence.md#verification).
|
||||
* Unbonding of staked assets in the Cosmos Hub [occurs after a period of 21 days](https://github.com/cosmos/governance/blob/ce75de4019b0129f6efcbb0e752cd2cc9e6136d3/params-change/Staking.md#unbondingtime).
|
||||
* IBC packets can use either a [timestamp or a height to timeout packet delivery](https://docs.cosmos.network/v0.44/ibc/overview.html#acknowledgements)
|
||||
* IBC packets can use either a [timestamp or a height to timeout packet delivery](https://docs.cosmos.network/v0.45/ibc/overview.html#acknowledgements)
|
||||
|
||||
Finally, inflation distribution in the Cosmos Hub uses an approximation of time to calculate an annual percentage rate.
|
||||
This approximation of time is calculated using [block heights with an estimated number of blocks produced in a year](https://github.com/cosmos/governance/blob/master/params-change/Mint.md#blocksperyear).
|
||||
|
||||
23
docs/qa/README.md
Normal file
@@ -0,0 +1,23 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance
|
||||
description: This is a report on the process followed and results obtained when running v0.34.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# Tendermint Quality Assurance
|
||||
|
||||
This directory keeps track of the process followed by the Tendermint Core team
|
||||
for Quality Assurance before cutting a release.
|
||||
This directory is to live in multiple branches. On each release branch,
|
||||
the contents of this directory reflect the status of the process
|
||||
at the time the Quality Assurance process was applied for that release.
|
||||
|
||||
File [method](./method.md) keeps track of the process followed to obtain the results
|
||||
used to decide if a release is passing the Quality Assurance process.
|
||||
The results obtained in each release are stored in their own directory.
|
||||
The following releases have undergone the Quality Assurance process:
|
||||
|
||||
* [v0.34.x](./v034/), which was tested just before releasing v0.34.22
|
||||
* [v0.37.x](./v037/), with v.34.x acting as a baseline
|
||||
214
docs/qa/method.md
Normal file
@@ -0,0 +1,214 @@
|
||||
---
|
||||
order: 1
|
||||
title: Method
|
||||
---
|
||||
|
||||
# Method
|
||||
|
||||
This document provides a detailed description of the QA process.
|
||||
It is intended to be used by engineers reproducing the experimental setup for future tests of Tendermint.
|
||||
|
||||
The (first iteration of the) QA process as described [in the RELEASES.md document][releases]
|
||||
was applied to version v0.34.x in order to have a set of results acting as benchmarking baseline.
|
||||
This baseline is then compared with results obtained in later versions.
|
||||
|
||||
Out of the testnet-based test cases described in [the releases document][releases] we focused on two of them:
|
||||
_200 Node Test_, and _Rotating Nodes Test_.
|
||||
|
||||
[releases]: https://github.com/tendermint/tendermint/blob/v0.37.x/RELEASES.md#large-scale-testnets
|
||||
|
||||
## Software Dependencies
|
||||
|
||||
### Infrastructure Requirements to Run the Tests
|
||||
|
||||
* An account at Digital Ocean (DO), with a high droplet limit (>202)
|
||||
* The machine to orchestrate the tests should have the following installed:
|
||||
* A clone of the [testnet repository][testnet-repo]
|
||||
* This repository contains all the scripts mentioned in the reminder of this section
|
||||
* [Digital Ocean CLI][doctl]
|
||||
* [Terraform CLI][Terraform]
|
||||
* [Ansible CLI][Ansible]
|
||||
|
||||
[testnet-repo]: https://github.com/interchainio/tendermint-testnet
|
||||
[Ansible]: https://docs.ansible.com/ansible/latest/index.html
|
||||
[Terraform]: https://www.terraform.io/docs
|
||||
[doctl]: https://docs.digitalocean.com/reference/doctl/how-to/install/
|
||||
|
||||
### Requirements for Result Extraction
|
||||
|
||||
* Matlab or Octave
|
||||
* [Prometheus][prometheus] server installed
|
||||
* blockstore DB of one of the full nodes in the testnet
|
||||
* Prometheus DB
|
||||
|
||||
[prometheus]: https://prometheus.io/
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Running the test
|
||||
|
||||
This section explains how the tests were carried out for reproducibility purposes.
|
||||
|
||||
1. [If you haven't done it before]
|
||||
Follow steps 1-4 of the `README.md` at the top of the testnet repository to configure Terraform, and `doctl`.
|
||||
2. Copy file `testnets/testnet200.toml` onto `testnet.toml` (do NOT commit this change)
|
||||
3. Set the variable `VERSION_TAG` in the `Makefile` to the git hash that is to be tested.
|
||||
4. Follow steps 5-10 of the `README.md` to configure and start the 200 node testnet
|
||||
* WARNING: Do NOT forget to run `make terraform-destroy` as soon as you are done with the tests (see step 9)
|
||||
5. As a sanity check, connect to the Prometheus node's web interface and check the graph for the `tendermint_consensus_height` metric.
|
||||
All nodes should be increasing their heights.
|
||||
6. `ssh` into the `testnet-load-runner`, then copy script `script/200-node-loadscript.sh` and run it from the load runner node.
|
||||
* Before running it, you need to edit the script to provide the IP address of a full node.
|
||||
This node will receive all transactions from the load runner node.
|
||||
* This script will take about 40 mins to run
|
||||
* It is running 90-seconds-long experiments in a loop with different loads
|
||||
7. Run `make retrieve-data` to gather all relevant data from the testnet into the orchestrating machine
|
||||
8. Verify that the data was collected without errors
|
||||
* at least one blockstore DB for a Tendermint validator
|
||||
* the Prometheus database from the Prometheus node
|
||||
* for extra care, you can run `zip -T` on the `prometheus.zip` file and (one of) the `blockstore.db.zip` file(s)
|
||||
9. **Run `make terraform-destroy`**
|
||||
* Don't forget to type `yes`! Otherwise you're in trouble.
|
||||
|
||||
### Result Extraction
|
||||
|
||||
The method for extracting the results described here is highly manual (and exploratory) at this stage.
|
||||
The Core team should improve it at every iteration to increase the amount of automation.
|
||||
|
||||
#### Steps
|
||||
|
||||
1. Unzip the blockstore into a directory
|
||||
2. Extract the latency report and the raw latencies for all the experiments. Run these commands from the directory containing the blockstore
|
||||
* `go run github.com/tendermint/tendermint/test/loadtime/cmd/report@3ec6e424d --database-type goleveldb --data-dir ./ > results/report.txt`
|
||||
* `go run github.com/tendermint/tendermint/test/loadtime/cmd/report@3ec6e424d --database-type goleveldb --data-dir ./ --csv results/raw.csv`
|
||||
3. File `report.txt` contains an unordered list of experiments with varying concurrent connections and transaction rate
|
||||
* Create files `report01.txt`, `report02.txt`, `report04.txt` and, for each experiment in file `report.txt`,
|
||||
copy its related lines to the filename that matches the number of connections.
|
||||
* Sort the experiments in `report01.txt` in ascending tx rate order. Likewise for `report02.txt` and `report04.txt`.
|
||||
4. Generate file `report_tabbed.txt` by showing the contents `report01.txt`, `report02.txt`, `report04.txt` side by side
|
||||
* This effectively creates a table where rows are a particular tx rate and columns are a particular number of websocket connections.
|
||||
5. Extract the raw latencies from file `raw.csv` using the following bash loop. This creates a `.csv` file and a `.dat` file per experiment.
|
||||
The format of the `.dat` files is amenable to loading them as matrices in Octave
|
||||
|
||||
```bash
|
||||
uuids=($(cat report01.txt report02.txt report04.txt | grep '^Experiment ID: ' | awk '{ print $3 }'))
|
||||
c=1
|
||||
for i in 01 02 04; do
|
||||
for j in 0025 0050 0100 0200; do
|
||||
echo $i $j $c "${uuids[$c]}"
|
||||
filename=c${i}_r${j}
|
||||
grep ${uuids[$c]} raw.csv > ${filename}.csv
|
||||
cat ${filename}.csv | tr , ' ' | awk '{ print $2, $3 }' > ${filename}.dat
|
||||
c=$(expr $c + 1)
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
6. Enter Octave
|
||||
7. Load all `.dat` files generated in step 5 into matrices using this Octave code snippet
|
||||
|
||||
```octave
|
||||
conns = { "01"; "02"; "04" };
|
||||
rates = { "0025"; "0050"; "0100"; "0200" };
|
||||
for i = 1:length(conns)
|
||||
for j = 1:length(rates)
|
||||
filename = strcat("c", conns{i}, "_r", rates{j}, ".dat");
|
||||
load("-ascii", filename);
|
||||
endfor
|
||||
endfor
|
||||
```
|
||||
|
||||
8. Set variable release to the current release undergoing QA
|
||||
|
||||
```octave
|
||||
release = "v0.34.x";
|
||||
```
|
||||
|
||||
9. Generate a plot with all (or some) experiments, where the X axis is the experiment time,
|
||||
and the y axis is the latency of transactions.
|
||||
The following snippet plots all experiments.
|
||||
|
||||
```octave
|
||||
legends = {};
|
||||
hold off;
|
||||
for i = 1:length(conns)
|
||||
for j = 1:length(rates)
|
||||
data_name = strcat("c", conns{i}, "_r", rates{j});
|
||||
l = strcat("c=", conns{i}, " r=", rates{j});
|
||||
m = eval(data_name); plot((m(:,1) - min(m(:,1))) / 1e+9, m(:,2) / 1e+9, ".");
|
||||
hold on;
|
||||
legends(1, end+1) = l;
|
||||
endfor
|
||||
endfor
|
||||
legend(legends, "location", "northeastoutside");
|
||||
xlabel("experiment time (s)");
|
||||
ylabel("latency (s)");
|
||||
t = sprintf("200-node testnet - %s", release);
|
||||
title(t);
|
||||
```
|
||||
|
||||
10. Consider adjusting the axis, in case you want to compare your results to the baseline, for instance
|
||||
|
||||
```octave
|
||||
axis([0, 100, 0, 30], "tic");
|
||||
```
|
||||
|
||||
11. Use Octave's GUI menu to save the plot (e.g. as `.png`)
|
||||
|
||||
12. Repeat steps 9 and 10 to obtain as many plots as deemed necessary.
|
||||
|
||||
13. To generate a latency vs throughput plot, using the raw CSV file generated
|
||||
in step 2, follow the instructions for the [`latency_throughput.py`] script.
|
||||
|
||||
[`latency_throughput.py`]: ../../scripts/qa/reporting/README.md
|
||||
|
||||
#### Extracting Prometheus Metrics
|
||||
|
||||
1. Stop the prometheus server if it is running as a service (e.g. a `systemd` unit).
|
||||
2. Unzip the prometheus database retrieved from the testnet, and move it to replace the
|
||||
local prometheus database.
|
||||
3. Start the prometheus server and make sure no error logs appear at start up.
|
||||
4. Introduce the metrics you want to gather or plot.
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
### Running the test
|
||||
|
||||
This section explains how the tests were carried out for reproducibility purposes.
|
||||
|
||||
1. [If you haven't done it before]
|
||||
Follow steps 1-4 of the `README.md` at the top of the testnet repository to configure Terraform, and `doctl`.
|
||||
2. Copy file `testnet_rotating.toml` onto `testnet.toml` (do NOT commit this change)
|
||||
3. Set variable `VERSION_TAG` to the git hash that is to be tested.
|
||||
4. Run `make terraform-apply EPHEMERAL_SIZE=25`
|
||||
* WARNING: Do NOT forget to run `make terraform-destroy` as soon as you are done with the tests
|
||||
5. Follow steps 6-10 of the `README.md` to configure and start the "stable" part of the rotating node testnet
|
||||
6. As a sanity check, connect to the Prometheus node's web interface and check the graph for the `tendermint_consensus_height` metric.
|
||||
All nodes should be increasing their heights.
|
||||
7. On a different shell,
|
||||
* run `make runload ROTATE_CONNECTIONS=X ROTATE_TX_RATE=Y`
|
||||
* `X` and `Y` should reflect a load below the saturation point (see, e.g.,
|
||||
[this paragraph](./v034/README.md#finding-the-saturation-point) for further info)
|
||||
8. Run `make rotate` to start the script that creates the ephemeral nodes, and kills them when they are caught up.
|
||||
* WARNING: If you run this command from your laptop, the laptop needs to be up and connected for full length
|
||||
of the experiment.
|
||||
9. When the height of the chain reaches 3000, stop the `make rotate` script
|
||||
10. When the rotate script has made two iterations (i.e., all ephemeral nodes have caught up twice)
|
||||
after height 3000 was reached, stop `make rotate`
|
||||
11. Run `make retrieve-data` to gather all relevant data from the testnet into the orchestrating machine
|
||||
12. Verify that the data was collected without errors
|
||||
* at least one blockstore DB for a Tendermint validator
|
||||
* the Prometheus database from the Prometheus node
|
||||
* for extra care, you can run `zip -T` on the `prometheus.zip` file and (one of) the `blockstore.db.zip` file(s)
|
||||
13. **Run `make terraform-destroy`**
|
||||
|
||||
Steps 8 to 10 are highly manual at the moment and will be improved in next iterations.
|
||||
|
||||
### Result Extraction
|
||||
|
||||
In order to obtain a latency plot, follow the instructions above for the 200 node experiment, but:
|
||||
|
||||
* The `results.txt` file contains only one experiment
|
||||
* Therefore, no need for any `for` loops
|
||||
|
||||
As for prometheus, the same method as for the 200 node experiment can be applied.
|
||||
278
docs/qa/v034/README.md
Normal file
@@ -0,0 +1,278 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance Results for v0.34.x
|
||||
description: This is a report on the results obtained when running v0.34.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# v0.34.x
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Finding the Saturation Point
|
||||
|
||||
The first goal when examining the results of the tests is identifying the saturation point.
|
||||
The saturation point is a setup with a transaction load big enough to prevent the testnet
|
||||
from being stable: the load runner tries to produce slightly more transactions than can
|
||||
be processed by the testnet.
|
||||
|
||||
The following table summarizes the results for v0.34.x, for the different experiments
|
||||
(extracted from file [`v034_report_tabbed.txt`](./img/v034_report_tabbed.txt)).
|
||||
|
||||
The X axis of this table is `c`, the number of connections created by the load runner process to the target node.
|
||||
The Y axis of this table is `r`, the rate or number of transactions issued per second.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35600 |
|
||||
| r=200 | 17800 | 35600 | 38660 |
|
||||
|
||||
The table shows the number of 1024-byte-long transactions that were produced by the load runner,
|
||||
and processed by Tendermint, during the 90 seconds of the experiment's duration.
|
||||
Each cell in the table refers to an experiment with a particular number of websocket connections (`c`)
|
||||
to a chosen validator, and the number of transactions per second that the load runner
|
||||
tries to produce (`r`). Note that the overall load that the tool attempts to generate is $c \cdot r$.
|
||||
|
||||
We can see that the saturation point is beyond the diagonal that spans cells
|
||||
|
||||
* `r=200,c=2`
|
||||
* `r=100,c=4`
|
||||
|
||||
given that the total transactions should be close to the product of the rate, the number of connections,
|
||||
and the experiment time (89 seconds, since the last batch never gets sent).
|
||||
|
||||
All experiments below the saturation diagonal (`r=200,c=4`) have in common that the total
|
||||
number of transactions processed is noticeably less than the product $c \cdot r \cdot 89$,
|
||||
which is the expected number of transactions when the system is able to deal well with the
|
||||
load.
|
||||
With `r=200,c=4`, we obtained 38660 whereas the theoretical number of transactions should
|
||||
have been $200 \cdot 4 \cdot 89 = 71200$.
|
||||
|
||||
At this point, we chose an experiment at the limit of the saturation diagonal,
|
||||
in order to further study the performance of this release.
|
||||
**The chosen experiment is `r=200,c=2`**.
|
||||
|
||||
This is a plot of the CPU load (average over 1 minute, as output by `top`) of the load runner for `r=200,c=2`,
|
||||
where we can see that the load stays close to 0 most of the time.
|
||||
|
||||

|
||||
|
||||
### Examining latencies
|
||||
|
||||
The method described [here](../method.md) allows us to plot the latencies of transactions
|
||||
for all experiments.
|
||||
|
||||

|
||||
|
||||
As we can see, even the experiments beyond the saturation diagonal managed to keep
|
||||
transaction latency stable (i.e. not constantly increasing).
|
||||
Our interpretation for this is that contention within Tendermint was propagated,
|
||||
via the websockets, to the load runner,
|
||||
hence the load runner could not produce the target load, but a fraction of it.
|
||||
|
||||
Further examination of the Prometheus data (see below), showed that the mempool contained many transactions
|
||||
at steady state, but did not grow much without quickly returning to this steady state. This demonstrates
|
||||
that the transactions were able to be processed by the Tendermint network at least as quickly as they
|
||||
were submitted to the mempool. Finally, the test script made sure that, at the end of an experiment, the
|
||||
mempool was empty so that all transactions submitted to the chain were processed.
|
||||
|
||||
Finally, the number of points present in the plot appears to be much less than expected given the
|
||||
number of transactions in each experiment, particularly close to or above the saturation diagonal.
|
||||
This is a visual effect of the plot; what appear to be points in the plot are actually potentially huge
|
||||
clusters of points. To corroborate this, we have zoomed in the plot above by setting (carefully chosen)
|
||||
tiny axis intervals. The cluster shown below looks like a single point in the plot above.
|
||||
|
||||

|
||||
|
||||
The plot of latencies can we used as a baseline to compare with other releases.
|
||||
|
||||
The following plot summarizes average latencies versus overall throughputs
|
||||
across different numbers of WebSocket connections to the node into which
|
||||
transactions are being loaded.
|
||||
|
||||

|
||||
|
||||
### Prometheus Metrics on the Chosen Experiment
|
||||
|
||||
As mentioned [above](#finding-the-saturation-point), the chosen experiment is `r=200,c=2`.
|
||||
This section further examines key metrics for this experiment extracted from Prometheus data.
|
||||
|
||||
#### Mempool Size
|
||||
|
||||
The mempool size, a count of the number of transactions in the mempool, was shown to be stable and homogeneous
|
||||
at all full nodes. It did not exhibit any unconstrained growth.
|
||||
The plot below shows the evolution over time of the cumulative number of transactions inside all full nodes' mempools
|
||||
at a given time.
|
||||
The two spikes that can be observed correspond to a period where consensus instances proceeded beyond the initial round
|
||||
at some nodes.
|
||||
|
||||

|
||||
|
||||
The plot below shows evolution of the average over all full nodes, which oscillates between 1500 and 2000
|
||||
outstanding transactions.
|
||||
|
||||

|
||||
|
||||
The peaks observed coincide with the moments when some nodes proceeded beyond the initial round of consensus (see below).
|
||||
|
||||
#### Peers
|
||||
|
||||
The number of peers was stable at all nodes.
|
||||
It was higher for the seed nodes (around 140) than for the rest (between 21 and 74).
|
||||
The fact that non-seed nodes reach more than 50 peers is due to #9548.
|
||||
|
||||

|
||||
|
||||
#### Consensus Rounds per Height
|
||||
|
||||
Most heights took just one round, but some nodes needed to advance to round 1 at some point.
|
||||
|
||||

|
||||
|
||||
#### Blocks Produced per Minute, Transactions Processed per Minute
|
||||
|
||||
The blocks produced per minute are the slope of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the height goes from 530 to 569.
|
||||
This results in an average of 19.5 blocks produced per minute.
|
||||
|
||||
The transactions processed per minute are the slope of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the total goes from 64525 to 100125 transactions,
|
||||
resulting in 17800 transactions per minute. However, we can see in the plot that
|
||||
all transactions in the load are processed long before the two minutes.
|
||||
If we adjust the time window when transactions are processed (approx. 105 seconds),
|
||||
we obtain 20343 transactions per minute.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
Resident Set Size of all monitored processes is plotted below.
|
||||
|
||||

|
||||
|
||||
The average over all processes oscillates around 1.2 GiB and does not demonstrate unconstrained growth.
|
||||
|
||||

|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The best metric from Prometheus to gauge CPU utilization in a Unix machine is `load1`,
|
||||
as it usually appears in the
|
||||
[output of `top`](https://www.digitalocean.com/community/tutorials/load-average-in-linux).
|
||||
|
||||

|
||||
|
||||
It is contained in most cases below 5, which is generally considered acceptable load.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: N/A** (v0.34.x is the baseline)
|
||||
|
||||
Date: 2022-10-14
|
||||
|
||||
Version: 3ec6e424d6ae4c96867c2dcf8310572156068bb6
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
For this testnet, we will use a load that can safely be considered below the saturation
|
||||
point for the size of this testnet (between 13 and 38 full nodes): `c=4,r=800`.
|
||||
|
||||
N.B.: The version of Tendermint used for these tests is affected by #9539.
|
||||
However, the reduced load that reaches the mempools is orthogonal to functionality
|
||||
we are focusing on here.
|
||||
|
||||
### Latencies
|
||||
|
||||
The plot of all latencies can be seen in the following plot.
|
||||
|
||||

|
||||
|
||||
We can observe there are some very high latencies, towards the end of the test.
|
||||
Upon suspicion that they are duplicate transactions, we examined the latencies
|
||||
raw file and discovered there are more than 100K duplicate transactions.
|
||||
|
||||
The following plot shows the latencies file where all duplicate transactions have
|
||||
been removed, i.e., only the first occurrence of a duplicate transaction is kept.
|
||||
|
||||

|
||||
|
||||
This problem, existing in `v0.34.x`, will need to be addressed, perhaps in the same way
|
||||
we addressed it when running the 200 node test with high loads: increasing the `cache_size`
|
||||
configuration parameter.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
The set of metrics shown here are less than for the 200 node experiment.
|
||||
We are only interested in those for which the catch-up process (blocksync) may have an impact.
|
||||
|
||||
#### Blocks and Transactions per minute
|
||||
|
||||
Just as shown for the 200 node test, the blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 5229 seconds, the height goes from 2 to 3638.
|
||||
This results in an average of 41 blocks produced per minute.
|
||||
|
||||
The following plot shows only the heights reported by ephemeral nodes
|
||||
(which are also included in the plot above). Note that the _height_ metric
|
||||
is only showed _once the node has switched to consensus_, hence the gaps
|
||||
when nodes are killed, wiped out, started from scratch, and catching up.
|
||||
|
||||

|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
The small lines we see periodically close to `y=0` are the transactions that
|
||||
ephemeral nodes start processing when they are caught up.
|
||||
|
||||
Over a period of 5229 minutes, the total goes from 0 to 387697 transactions,
|
||||
resulting in 4449 transactions per minute. We can see some abrupt changes in
|
||||
the plot's gradient. This will need to be investigated.
|
||||
|
||||
#### Peers
|
||||
|
||||
The plot below shows the evolution in peers throughout the experiment.
|
||||
The periodic changes observed are due to the ephemeral nodes being stopped,
|
||||
wiped out, and recreated.
|
||||
|
||||

|
||||
|
||||
The validators' plots are concentrated at the higher part of the graph, whereas the ephemeral nodes
|
||||
are mostly at the lower part.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
The average Resident Set Size (RSS) over all processes seems stable, and slightly growing toward the end.
|
||||
This might be related to the increased in transaction load observed above.
|
||||
|
||||

|
||||
|
||||
The memory taken by the validators and the ephemeral nodes (when they are up) is comparable.
|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The plot shows metric `load1` for all nodes.
|
||||
|
||||

|
||||
|
||||
It is contained under 5 most of the time, which is considered normal load.
|
||||
The purple line, which follows a different pattern is the validator receiving all
|
||||
transactions, via RPC, from the load runner process.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: N/A**
|
||||
|
||||
Date: 2022-10-10
|
||||
|
||||
Version: a28c987f5a604ff66b515dd415270063e6fb069d
|
||||
BIN
docs/qa/v034/img/v034_200node_latencies.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/qa/v034/img/v034_200node_latencies_zoomed.png
Normal file
|
After Width: | Height: | Size: 34 KiB |
BIN
docs/qa/v034/img/v034_latency_throughput.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/qa/v034/img/v034_r200c2_heights.png
Normal file
|
After Width: | Height: | Size: 378 KiB |
BIN
docs/qa/v034/img/v034_r200c2_load-runner.png
Normal file
|
After Width: | Height: | Size: 150 KiB |
BIN
docs/qa/v034/img/v034_r200c2_load1.png
Normal file
|
After Width: | Height: | Size: 759 KiB |
BIN
docs/qa/v034/img/v034_r200c2_mempool_size.png
Normal file
|
After Width: | Height: | Size: 2.4 MiB |
BIN
docs/qa/v034/img/v034_r200c2_mempool_size_avg.png
Normal file
|
After Width: | Height: | Size: 192 KiB |
BIN
docs/qa/v034/img/v034_r200c2_peers.png
Normal file
|
After Width: | Height: | Size: 130 KiB |
BIN
docs/qa/v034/img/v034_r200c2_rounds.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
docs/qa/v034/img/v034_r200c2_rss.png
Normal file
|
After Width: | Height: | Size: 926 KiB |
BIN
docs/qa/v034/img/v034_r200c2_rss_avg.png
Normal file
|
After Width: | Height: | Size: 157 KiB |
BIN
docs/qa/v034/img/v034_r200c2_total-txs.png
Normal file
|
After Width: | Height: | Size: 534 KiB |
52
docs/qa/v034/img/v034_report_tabbed.txt
Normal file
@@ -0,0 +1,52 @@
|
||||
Experiment ID: 3d5cf4ef-1a1a-4b46-aa2d-da5643d2e81e │Experiment ID: 80e472ec-13a1-4772-a827-3b0c907fb51d │Experiment ID: 07aca6cf-c5a4-4696-988f-e3270fc6333b
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 25 │ Rate: 25 │ Rate: 25
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 2225 │ Total Valid Tx: 4450 │ Total Valid Tx: 8900
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 599.404362ms │ Minimum Latency: 448.145181ms │ Minimum Latency: 412.485729ms
|
||||
Maximum Latency: 3.539686885s │ Maximum Latency: 3.237392049s │ Maximum Latency: 12.026665368s
|
||||
Average Latency: 1.441485349s │ Average Latency: 1.441267946s │ Average Latency: 2.150192457s
|
||||
Standard Deviation: 541.049869ms │ Standard Deviation: 525.040007ms │ Standard Deviation: 2.233852478s
|
||||
│ │
|
||||
Experiment ID: 953dc544-dd40-40e8-8712-20c34c3ce45e │Experiment ID: d31fc258-16e7-45cd-9dc8-13ab87bc0b0a │Experiment ID: 15d90a7e-b941-42f4-b411-2f15f857739e
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 50 │ Rate: 50 │ Rate: 50
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 4450 │ Total Valid Tx: 8900 │ Total Valid Tx: 17800
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 482.046942ms │ Minimum Latency: 435.458913ms │ Minimum Latency: 510.746448ms
|
||||
Maximum Latency: 3.761483455s │ Maximum Latency: 7.175583584s │ Maximum Latency: 6.551497882s
|
||||
Average Latency: 1.450408183s │ Average Latency: 1.681673116s │ Average Latency: 1.738083875s
|
||||
Standard Deviation: 587.560056ms │ Standard Deviation: 1.147902047s │ Standard Deviation: 943.46522ms
|
||||
│ │
|
||||
Experiment ID: 9a0b9980-9ce6-4db5-a80a-65ca70294b87 │Experiment ID: df8fa4f4-80af-4ded-8a28-356d15018b43 │Experiment ID: d0e41c2c-89c0-4f38-8e34-ca07adae593a
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 100 │ Rate: 100 │ Rate: 100
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 8900 │ Total Valid Tx: 17800 │ Total Valid Tx: 35600
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 477.417219ms │ Minimum Latency: 564.29247ms │ Minimum Latency: 840.71089ms
|
||||
Maximum Latency: 6.63744785s │ Maximum Latency: 6.988553219s │ Maximum Latency: 9.555312398s
|
||||
Average Latency: 1.561216103s │ Average Latency: 1.76419063s │ Average Latency: 3.200941683s
|
||||
Standard Deviation: 1.011333552s │ Standard Deviation: 1.068459423s │ Standard Deviation: 1.732346601s
|
||||
│ │
|
||||
Experiment ID: 493df3ee-4a36-4bce-80f8-6d65da66beda │Experiment ID: 13060525-f04f-46f6-8ade-286684b2fe50 │Experiment ID: 1777cbd2-8c96-42e4-9ec7-9b21f2225e4d
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 200 │ Rate: 200 │ Rate: 200
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 17800 │ Total Valid Tx: 35600 │ Total Valid Tx: 38660
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 493.705261ms │ Minimum Latency: 955.090573ms │ Minimum Latency: 1.9485821s
|
||||
Maximum Latency: 7.440921872s │ Maximum Latency: 10.086673491s │ Maximum Latency: 17.73103976s
|
||||
Average Latency: 1.875510582s │ Average Latency: 3.438130099s │ Average Latency: 8.143862237s
|
||||
Standard Deviation: 1.304336995s │ Standard Deviation: 1.966391574s │ Standard Deviation: 3.943140002s
|
||||
|
||||
BIN
docs/qa/v034/img/v034_rotating_heights.png
Normal file
|
After Width: | Height: | Size: 157 KiB |
BIN
docs/qa/v034/img/v034_rotating_heights_ephe.png
Normal file
|
After Width: | Height: | Size: 140 KiB |
BIN
docs/qa/v034/img/v034_rotating_latencies.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v034/img/v034_rotating_latencies_uniq.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v034/img/v034_rotating_load1.png
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
BIN
docs/qa/v034/img/v034_rotating_peers.png
Normal file
|
After Width: | Height: | Size: 486 KiB |
BIN
docs/qa/v034/img/v034_rotating_rss_avg.png
Normal file
|
After Width: | Height: | Size: 193 KiB |
BIN
docs/qa/v034/img/v034_rotating_total-txs.png
Normal file
|
After Width: | Height: | Size: 197 KiB |
326
docs/qa/v037/README.md
Normal file
@@ -0,0 +1,326 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance Results for v0.37.x
|
||||
description: This is a report on the results obtained when running v0.37.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# v0.37.x
|
||||
|
||||
## Issues discovered
|
||||
|
||||
During this iteration of the QA process, the following issues were found:
|
||||
|
||||
* (critical, fixed) [\#9533] - This bug caused full nodes to sometimes get stuck
|
||||
when blocksyncing, requiring a manual restart to unblock them. Importantly,
|
||||
this bug was also present in v0.34.x and the fix was also backported in
|
||||
[\#9534].
|
||||
* (critical, fixed) [\#9539] - `loadtime` is very likely to include more than
|
||||
one "=" character in transactions, with is rejected by the e2e application.
|
||||
* (critical, fixed) [\#9581] - Absent prometheus label makes Tendermint crash
|
||||
when enabling Prometheus metric collection
|
||||
* (non-critical, not fixed) [\#9548] - Full nodes can go over 50 connected
|
||||
peers, which is not intended by the default configuration.
|
||||
* (non-critical, not fixed) [\#9537] - With the default mempool cache setting,
|
||||
duplicated transactions are not rejected when gossipped and eventually flood
|
||||
all mempools. The 200 node testnets were thus run with a value of 200000 (as
|
||||
opposed to the default 10000)
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Finding the Saturation Point
|
||||
|
||||
The first goal is to identify the saturation point and compare it with the baseline (v0.34.x).
|
||||
For further details, see [this paragraph](../v034/README.md#finding-the-saturation-point)
|
||||
in the baseline version.
|
||||
|
||||
The following table summarizes the results for v0.37.x, for the different experiments
|
||||
(extracted from file [`v037_report_tabbed.txt`](./img/v037_report_tabbed.txt)).
|
||||
|
||||
The X axis of this table is `c`, the number of connections created by the load runner process to the target node.
|
||||
The Y axis of this table is `r`, the rate or number of transactions issued per second.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35600 |
|
||||
| r=200 | 17800 | 35600 | 38660 |
|
||||
|
||||
For comparison, this is the table with the baseline version.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35400 |
|
||||
| r=200 | 17800 | 35600 | 37358 |
|
||||
|
||||
The saturation point is beyond the diagonal:
|
||||
|
||||
* `r=200,c=2`
|
||||
* `r=100,c=4`
|
||||
|
||||
which is at the same place as the baseline. For more details on the saturation point, see
|
||||
[this paragraph](../v034/README.md#finding-the-saturation-point) in the baseline version.
|
||||
|
||||
The experiment chosen to examine Prometheus metrics is the same as in the baseline:
|
||||
**`r=200,c=2`**.
|
||||
|
||||
The load runner's CPU load was negligible (near 0) when running `r=200,c=2`.
|
||||
|
||||
### Examining latencies
|
||||
|
||||
The method described [here](../method.md) allows us to plot the latencies of transactions
|
||||
for all experiments.
|
||||
|
||||

|
||||
|
||||
The data seen in the plot is similar to that of the baseline.
|
||||
|
||||

|
||||
|
||||
Therefore, for further details on these plots,
|
||||
see [this paragraph](../v034/README.md#examining-latencies) in the baseline version.
|
||||
|
||||
The following plot summarizes average latencies versus overall throughputs
|
||||
across different numbers of WebSocket connections to the node into which
|
||||
transactions are being loaded.
|
||||
|
||||

|
||||
|
||||
This is similar to that of the baseline plot:
|
||||
|
||||

|
||||
|
||||
### Prometheus Metrics on the Chosen Experiment
|
||||
|
||||
As mentioned [above](#finding-the-saturation-point), the chosen experiment is `r=200,c=2`.
|
||||
This section further examines key metrics for this experiment extracted from Prometheus data.
|
||||
|
||||
#### Mempool Size
|
||||
|
||||
The mempool size, a count of the number of transactions in the mempool, was shown to be stable and homogeneous
|
||||
at all full nodes. It did not exhibit any unconstrained growth.
|
||||
The plot below shows the evolution over time of the cumulative number of transactions inside all full nodes' mempools
|
||||
at a given time.
|
||||
|
||||

|
||||
|
||||
The plot below shows evolution of the average over all full nodes, which oscillate between 1500 and 2000 outstanding transactions.
|
||||
|
||||

|
||||
|
||||
The peaks observed coincide with the moments when some nodes reached round 1 of consensus (see below).
|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Peers
|
||||
|
||||
The number of peers was stable at all nodes.
|
||||
It was higher for the seed nodes (around 140) than for the rest (between 16 and 78).
|
||||
|
||||

|
||||
|
||||
Just as in the baseline, the fact that non-seed nodes reach more than 50 peers is due to #9548.
|
||||
|
||||
**This plot yields similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||
#### Consensus Rounds per Height
|
||||
|
||||
Most heights took just one round, but some nodes needed to advance to round 1 at some point.
|
||||
|
||||

|
||||
|
||||
**This plot yields slightly better results than the baseline**:
|
||||
|
||||

|
||||
|
||||
#### Blocks Produced per Minute, Transactions Processed per Minute
|
||||
|
||||
The blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the height goes from 477 to 524.
|
||||
This results in an average of 23.5 blocks produced per minute.
|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the total goes from 64525 to 100125 transactions,
|
||||
resulting in 17800 transactions per minute. However, we can see in the plot that
|
||||
all transactions in the load are process long before the two minutes.
|
||||
If we adjust the time window when transactions are processed (approx. 90 seconds),
|
||||
we obtain 23733 transactions per minute.
|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
Resident Set Size of all monitored processes is plotted below.
|
||||
|
||||

|
||||
|
||||
The average over all processes oscillates around 380 MiB and does not demonstrate unconstrained growth.
|
||||
|
||||

|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The best metric from Prometheus to gauge CPU utilization in a Unix machine is `load1`,
|
||||
as it usually appears in the
|
||||
[output of `top`](https://www.digitalocean.com/community/tutorials/load-average-in-linux).
|
||||
|
||||

|
||||
|
||||
It is contained below 5 on most nodes.
|
||||
|
||||
**This plot yields similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: PASS**
|
||||
|
||||
Date: 2022-10-14
|
||||
|
||||
Version: 1cf9d8e276afe8595cba960b51cd056514965fd1
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
We use the same load as in the baseline: `c=4,r=800`.
|
||||
|
||||
Just as in the baseline tests, the version of Tendermint used for these tests is affected by #9539.
|
||||
See this paragraph in the [baseline report](../v034/README.md#rotating-node-testnet) for further details.
|
||||
Finally, note that this setup allows for a fairer comparison between this version and the baseline.
|
||||
|
||||
### Latencies
|
||||
|
||||
The plot of all latencies can be seen here.
|
||||
|
||||

|
||||
|
||||
Which is similar to the baseline.
|
||||
|
||||

|
||||
|
||||
Note that we are comparing against the baseline plot with _unique_
|
||||
transactions. This is because the problem with duplicate transactions
|
||||
detected during the baseline experiment did not show up for `v0.37`,
|
||||
which is _not_ proof that the problem is not present in `v0.37`.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
The set of metrics shown here match those shown on the baseline (`v0.34`) for the same experiment.
|
||||
We also show the baseline results for comparison.
|
||||
|
||||
#### Blocks and Transactions per minute
|
||||
|
||||
The blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 4446 seconds, the height goes from 5 to 3323.
|
||||
This results in an average of 45 blocks produced per minute,
|
||||
which is similar to the baseline, shown below.
|
||||
|
||||

|
||||
|
||||
The following two plots show only the heights reported by ephemeral nodes.
|
||||
The second plot is the baseline plot for comparison.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
By the length of the segments, we can see that ephemeral nodes in `v0.37`
|
||||
catch up slightly faster.
|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 3852 seconds, the total goes from 597 to 267298 transactions in one of the validators,
|
||||
resulting in 4154 transactions per minute, which is slightly lower than the baseline,
|
||||
although the baseline had to deal with duplicate transactions.
|
||||
|
||||
For comparison, this is the baseline plot.
|
||||
|
||||

|
||||
|
||||
#### Peers
|
||||
|
||||
The plot below shows the evolution of the number of peers throughout the experiment.
|
||||
|
||||

|
||||
|
||||
This is the baseline plot, for comparison.
|
||||
|
||||

|
||||
|
||||
The plotted values and their evolution are comparable in both plots.
|
||||
|
||||
For further details on these plots, see the baseline report.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
The average Resident Set Size (RSS) over all processes looks slightly more stable
|
||||
on `v0.37` (first plot) than on the baseline (second plot).
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
The memory taken by the validators and the ephemeral nodes when they are up is comparable (not shown in the plots),
|
||||
just as observed in the baseline.
|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The plot shows metric `load1` for all nodes.
|
||||
|
||||

|
||||
|
||||
This is the baseline plot.
|
||||
|
||||

|
||||
|
||||
In both cases, it is contained under 5 most of the time, which is considered normal load.
|
||||
The green line in the `v0.37` plot and the purple line in the baseline plot (`v0.34`)
|
||||
correspond to the validators receiving all transactions, via RPC, from the load runner process.
|
||||
In both cases, they oscillate around 5 (normal load). The main difference is that other
|
||||
nodes are generally less loaded in `v0.37`.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: PASS**
|
||||
|
||||
Date: 2022-10-10
|
||||
|
||||
Version: 155110007b9d8b83997a799016c1d0844c8efbaf
|
||||
|
||||
[\#9533]: https://github.com/tendermint/tendermint/pull/9533
|
||||
[\#9534]: https://github.com/tendermint/tendermint/pull/9534
|
||||
[\#9539]: https://github.com/tendermint/tendermint/issues/9539
|
||||
[\#9548]: https://github.com/tendermint/tendermint/issues/9548
|
||||
[\#9537]: https://github.com/tendermint/tendermint/issues/9537
|
||||
[\#9581]: https://github.com/tendermint/tendermint/issues/9581
|
||||
BIN
docs/qa/v037/img/v037_200node_latencies.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/qa/v037/img/v037_latency_throughput.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/qa/v037/img/v037_r200c2_heights.png
Normal file
|
After Width: | Height: | Size: 411 KiB |
BIN
docs/qa/v037/img/v037_r200c2_load1.png
Normal file
|
After Width: | Height: | Size: 887 KiB |
BIN
docs/qa/v037/img/v037_r200c2_mempool_size.png
Normal file
|
After Width: | Height: | Size: 2.3 MiB |
BIN
docs/qa/v037/img/v037_r200c2_mempool_size_avg.png
Normal file
|
After Width: | Height: | Size: 183 KiB |
BIN
docs/qa/v037/img/v037_r200c2_peers.png
Normal file
|
After Width: | Height: | Size: 133 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rounds.png
Normal file
|
After Width: | Height: | Size: 589 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rss.png
Normal file
|
After Width: | Height: | Size: 816 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rss_avg.png
Normal file
|
After Width: | Height: | Size: 154 KiB |
BIN
docs/qa/v037/img/v037_r200c2_total-txs.png
Normal file
|
After Width: | Height: | Size: 538 KiB |
52
docs/qa/v037/img/v037_report_tabbed.txt
Normal file
@@ -0,0 +1,52 @@
|
||||
Experiment ID: af129eae-7039-4c76-8c37-cff9ac636a84 │Experiment ID: 0f88bd33-9bf0-4197-8d1d-9a737c301ec6 │Experiment ID: 88227cad-2ba8-4eb6-b493-041d8120b46f
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 25 │ Rate: 25 │ Rate: 25
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 2225 │ Total Valid Tx: 4450 │ Total Valid Tx: 8900
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 506.248587ms │ Minimum Latency: 469.53452ms │ Minimum Latency: 588.900721ms
|
||||
Maximum Latency: 3.032125789s │ Maximum Latency: 6.548830955s │ Maximum Latency: 6.533739843s
|
||||
Average Latency: 1.427767726s │ Average Latency: 1.448582257s │ Average Latency: 1.717432341s
|
||||
Standard Deviation: 524.11782ms │ Standard Deviation: 768.684133ms │ Standard Deviation: 1.000015768s
|
||||
│ │
|
||||
Experiment ID: f03d39bd-0233-4b3c-b461-543445ae1d4b │Experiment ID: 46674f1c-e591-4e36-bb9b-f375c19fc475 │Experiment ID: 5385c159-8d4d-455b-bced-dcd4a3209988
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 50 │ Rate: 50 │ Rate: 50
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 4450 │ Total Valid Tx: 8900 │ Total Valid Tx: 17800
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 477.46027ms │ Minimum Latency: 455.757111ms │ Minimum Latency: 594.749081ms
|
||||
Maximum Latency: 2.483895394s │ Maximum Latency: 2.904715695s │ Maximum Latency: 9.294950389s
|
||||
Average Latency: 1.407374662s │ Average Latency: 1.397385779s │ Average Latency: 2.621122536s
|
||||
Standard Deviation: 505.150067ms │ Standard Deviation: 551.67603ms │ Standard Deviation: 1.772725794s
|
||||
│ │
|
||||
Experiment ID: 9161b4a7-d75c-455f-b82d-2b5235d533cf │Experiment ID: 993a13a8-9db1-4b2b-9c20-71a5b85e4bbf │Experiment ID: ad1eb9e1-f4d6-41fd-9ba7-0f1f7dde1e3e
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 100 │ Rate: 100 │ Rate: 100
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 8900 │ Total Valid Tx: 17800 │ Total Valid Tx: 35400
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 448.050467ms │ Minimum Latency: 605.436195ms │ Minimum Latency: 1.16816912s
|
||||
Maximum Latency: 3.789711139s │ Maximum Latency: 7.292770222s │ Maximum Latency: 11.378681842s
|
||||
Average Latency: 1.451342158s │ Average Latency: 2.07457999s │ Average Latency: 3.918384209s
|
||||
Standard Deviation: 644.075973ms │ Standard Deviation: 1.230204022s │ Standard Deviation: 2.172400458s
|
||||
│ │
|
||||
Experiment ID: 3cbe9c3d-9c43-4c9f-b5ca-b567d20bbd57 │Experiment ID: af836c5e-d9b6-4d5d-971c-2fc7f07aa2a0 │Experiment ID: 77606397-4989-41d4-b13b-f1f4d1af063f
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 200 │ Rate: 200 │ Rate: 200
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 17800 │ Total Valid Tx: 35600 │ Total Valid Tx: 37358
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 519.984701ms │ Minimum Latency: 820.755087ms │ Minimum Latency: 1.712574804s
|
||||
Maximum Latency: 12.609056712s │ Maximum Latency: 9.260798095s │ Maximum Latency: 25.739223696s
|
||||
Average Latency: 2.717853101s │ Average Latency: 3.477731881s │ Average Latency: 8.547725264s
|
||||
Standard Deviation: 2.390778155s │ Standard Deviation: 1.675000913s │ Standard Deviation: 4.76961569s
|
||||
|
||||
BIN
docs/qa/v037/img/v037_rotating_heights.png
Normal file
|
After Width: | Height: | Size: 167 KiB |
BIN
docs/qa/v037/img/v037_rotating_heights_ephe.png
Normal file
|
After Width: | Height: | Size: 138 KiB |
BIN
docs/qa/v037/img/v037_rotating_latencies.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v037/img/v037_rotating_load1.png
Normal file
|
After Width: | Height: | Size: 1.3 MiB |
BIN
docs/qa/v037/img/v037_rotating_peers.png
Normal file
|
After Width: | Height: | Size: 577 KiB |
BIN
docs/qa/v037/img/v037_rotating_rss_avg.png
Normal file
|
After Width: | Height: | Size: 217 KiB |
BIN
docs/qa/v037/img/v037_rotating_total-txs.png
Normal file
|
After Width: | Height: | Size: 181 KiB |
6
go.mod
@@ -3,7 +3,7 @@ module github.com/tendermint/tendermint
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.2.0
|
||||
github.com/BurntSushi/toml v1.2.1
|
||||
github.com/adlio/schema v1.3.3
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
|
||||
github.com/fortytw2/leaktest v1.3.0
|
||||
@@ -28,13 +28,13 @@ require (
|
||||
github.com/rs/cors v1.8.2
|
||||
github.com/sasha-s/go-deadlock v0.3.1
|
||||
github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa
|
||||
github.com/spf13/cobra v1.5.0
|
||||
github.com/spf13/cobra v1.6.0
|
||||
github.com/spf13/viper v1.13.0
|
||||
github.com/stretchr/testify v1.8.0
|
||||
github.com/tendermint/tm-db v0.6.6
|
||||
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa
|
||||
golang.org/x/net v0.0.0-20220812174116-3211cb980234
|
||||
google.golang.org/grpc v1.50.0
|
||||
google.golang.org/grpc v1.50.1
|
||||
)
|
||||
|
||||
require (
|
||||
|
||||
12
go.sum
@@ -53,8 +53,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/toml v1.2.0 h1:Rt8g24XnyGTyglgET/PRUNlrUeu9F5L+7FilkXfZgs0=
|
||||
github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak=
|
||||
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d/go.mod h1:URdX5+vg25ts3aCh8H5IFZybJYKWhJHYMTnf+ULtoC4=
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60=
|
||||
@@ -1042,8 +1042,8 @@ github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tL
|
||||
github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE=
|
||||
github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI=
|
||||
github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
|
||||
github.com/spf13/cobra v1.5.0 h1:X+jTBEBqF0bHN+9cSMgmfuvv2VHJ9ezmFNf9Y/XstYU=
|
||||
github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM=
|
||||
github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI=
|
||||
github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
|
||||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
||||
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
|
||||
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
|
||||
@@ -1696,8 +1696,8 @@ google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQ
|
||||
google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=
|
||||
google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k=
|
||||
google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU=
|
||||
google.golang.org/grpc v1.50.0 h1:fPVVDxY9w++VjTZsYvXWqEf9Rqar/e+9zYfxKK+W+YU=
|
||||
google.golang.org/grpc v1.50.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI=
|
||||
google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY=
|
||||
google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
|
||||
@@ -29,7 +29,7 @@ var _ = time.Kitchen
|
||||
// proto package needs to be updated.
|
||||
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
|
||||
|
||||
// BlockIdFlag indicates which BlcokID the signature is for
|
||||
// BlockIdFlag indicates which BlockID the signature is for
|
||||
type BlockIDFlag int32
|
||||
|
||||
const (
|
||||
|
||||
@@ -9,15 +9,15 @@ import "tendermint/crypto/proof.proto";
|
||||
import "tendermint/version/types.proto";
|
||||
import "tendermint/types/validator.proto";
|
||||
|
||||
// BlockIdFlag indicates which BlcokID the signature is for
|
||||
// BlockIdFlag indicates which BlockID the signature is for
|
||||
enum BlockIDFlag {
|
||||
option (gogoproto.goproto_enum_stringer) = true;
|
||||
option (gogoproto.goproto_enum_prefix) = false;
|
||||
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "BlockIDFlagUnknown"];
|
||||
BLOCK_ID_FLAG_ABSENT = 1 [(gogoproto.enumvalue_customname) = "BlockIDFlagAbsent"];
|
||||
BLOCK_ID_FLAG_COMMIT = 2 [(gogoproto.enumvalue_customname) = "BlockIDFlagCommit"];
|
||||
BLOCK_ID_FLAG_NIL = 3 [(gogoproto.enumvalue_customname) = "BlockIDFlagNil"];
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "BlockIDFlagUnknown"]; // indicates an error condition
|
||||
BLOCK_ID_FLAG_ABSENT = 1 [(gogoproto.enumvalue_customname) = "BlockIDFlagAbsent"]; // the vote was not received
|
||||
BLOCK_ID_FLAG_COMMIT = 2 [(gogoproto.enumvalue_customname) = "BlockIDFlagCommit"]; // voted for the block that received the majority
|
||||
BLOCK_ID_FLAG_NIL = 3 [(gogoproto.enumvalue_customname) = "BlockIDFlagNil"]; // voted for nil
|
||||
}
|
||||
|
||||
// SignedMsgType is a type of signed message in the consensus.
|
||||
|
||||
48
scripts/qa/reporting/README.md
Normal file
@@ -0,0 +1,48 @@
|
||||
# Reporting Scripts
|
||||
|
||||
This directory contains just one utility script at present that is used in
|
||||
reporting/QA.
|
||||
|
||||
## Latency vs Throughput Plotting
|
||||
|
||||
[`latency_throughput.py`](./latency_throughput.py) is a Python script that uses
|
||||
[matplotlib] to plot a graph of transaction latency vs throughput rate based on
|
||||
the CSV output generated by the [loadtime reporting
|
||||
tool](../../../test/loadtime/cmd/report/).
|
||||
|
||||
### Setup
|
||||
|
||||
Execute the following within this directory (the same directory as the
|
||||
`latency_throughput.py` file).
|
||||
|
||||
```bash
|
||||
# Create a virtual environment into which to install your dependencies
|
||||
python3 -m venv .venv
|
||||
|
||||
# Activate the virtual environment
|
||||
source .venv/bin/activate
|
||||
|
||||
# Install dependencies listed in requirements.txt
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Show usage instructions and parameters
|
||||
./latency_throughput.py --help
|
||||
```
|
||||
|
||||
### Running
|
||||
|
||||
```bash
|
||||
# Do the following while ensuring that the virtual environment is activated (see
|
||||
# the Setup steps).
|
||||
#
|
||||
# This will generate a plot in a PNG file called 'tm034.png' in the current
|
||||
# directory based on the reporting tool CSV output in the "raw.csv" file. The
|
||||
# '-t' flag overrides the default title at the top of the plot.
|
||||
|
||||
./latency_throughput.py \
|
||||
-t 'Tendermint v0.34.x Latency vs Throughput' \
|
||||
./tm034.png \
|
||||
/path/to/csv/files/raw.csv
|
||||
```
|
||||
|
||||
[matplotlib]: https://matplotlib.org/
|
||||
170
scripts/qa/reporting/latency_throughput.py
Executable file
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
A simple script to parse the CSV output from the loadtime reporting tool (see
|
||||
https://github.com/tendermint/tendermint/tree/main/test/loadtime/cmd/report).
|
||||
|
||||
Produces a plot of average transaction latency vs total transaction throughput
|
||||
according to the number of load testing tool WebSocket connections to the
|
||||
Tendermint node.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import sys
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
DEFAULT_TITLE = "Tendermint latency vs throughput"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Renders a latency vs throughput diagram "
|
||||
"for a set of transactions provided by the loadtime reporting tool",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('-t',
|
||||
'--title',
|
||||
default=DEFAULT_TITLE,
|
||||
help='Plot title')
|
||||
parser.add_argument('output_image',
|
||||
help='Output image file (in PNG format)')
|
||||
parser.add_argument(
|
||||
'input_csv_file',
|
||||
nargs='+',
|
||||
help="CSV input file from which to read transaction data "
|
||||
"- must have been generated by the loadtime reporting tool")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(format='%(levelname)s\t%(message)s',
|
||||
stream=sys.stdout,
|
||||
level=logging.INFO)
|
||||
plot_latency_vs_throughput(args.input_csv_file,
|
||||
args.output_image,
|
||||
title=args.title)
|
||||
|
||||
|
||||
def plot_latency_vs_throughput(input_files, output_image, title=DEFAULT_TITLE):
|
||||
avg_latencies, throughput_rates = process_input_files(input_files, )
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
connections = sorted(avg_latencies.keys())
|
||||
for c in connections:
|
||||
tr = np.array(throughput_rates[c])
|
||||
al = np.array(avg_latencies[c])
|
||||
label = '%d connection%s' % (c, '' if c == 1 else 's')
|
||||
ax.plot(tr, al, 'o-', label=label)
|
||||
|
||||
ax.set_title(title)
|
||||
ax.set_xlabel('Throughput rate (tx/s)')
|
||||
ax.set_ylabel('Average transaction latency (s)')
|
||||
|
||||
plt.legend(loc='upper left')
|
||||
plt.savefig(output_image)
|
||||
|
||||
|
||||
def process_input_files(input_files):
|
||||
# Experimental data from which we will derive the latency vs throughput
|
||||
# statistics
|
||||
experiments = {}
|
||||
|
||||
for input_file in input_files:
|
||||
logging.info('Reading %s...' % input_file)
|
||||
|
||||
with open(input_file, 'rt') as inf:
|
||||
reader = csv.DictReader(inf)
|
||||
for tx in reader:
|
||||
experiments = process_tx(experiments, tx)
|
||||
|
||||
return compute_experiments_stats(experiments)
|
||||
|
||||
|
||||
def process_tx(experiments, tx):
|
||||
exp_id = tx['experiment_id']
|
||||
# Block time is nanoseconds from the epoch - convert to seconds
|
||||
block_time = float(tx['block_time']) / (10**9)
|
||||
# Duration is also in nanoseconds - convert to seconds
|
||||
duration = float(tx['duration_ns']) / (10**9)
|
||||
connections = int(tx['connections'])
|
||||
rate = int(tx['rate'])
|
||||
|
||||
if exp_id not in experiments:
|
||||
experiments[exp_id] = {
|
||||
'connections': connections,
|
||||
'rate': rate,
|
||||
'block_time_min': block_time,
|
||||
# We keep track of the latency associated with the minimum block
|
||||
# time to estimate the start time of the experiment
|
||||
'block_time_min_duration': duration,
|
||||
'block_time_max': block_time,
|
||||
'total_latencies': duration,
|
||||
'tx_count': 1,
|
||||
}
|
||||
logging.info('Found experiment %s with rate=%d, connections=%d' %
|
||||
(exp_id, rate, connections))
|
||||
else:
|
||||
# Validation
|
||||
for field in ['connections', 'rate']:
|
||||
val = int(tx[field])
|
||||
if val != experiments[exp_id][field]:
|
||||
raise Exception(
|
||||
'Found multiple distinct values for field '
|
||||
'"%s" for the same experiment (%s): %d and %d' %
|
||||
(field, exp_id, val, experiments[exp_id][field]))
|
||||
|
||||
if block_time < experiments[exp_id]['block_time_min']:
|
||||
experiments[exp_id]['block_time_min'] = block_time
|
||||
experiments[exp_id]['block_time_min_duration'] = duration
|
||||
if block_time > experiments[exp_id]['block_time_max']:
|
||||
experiments[exp_id]['block_time_max'] = block_time
|
||||
|
||||
experiments[exp_id]['total_latencies'] += duration
|
||||
experiments[exp_id]['tx_count'] += 1
|
||||
|
||||
return experiments
|
||||
|
||||
|
||||
def compute_experiments_stats(experiments):
|
||||
"""Compute average latency vs throughput rate statistics from the given
|
||||
experiments"""
|
||||
stats = {}
|
||||
|
||||
# Compute average latency and throughput rate for each experiment
|
||||
for exp_id, exp in experiments.items():
|
||||
conns = exp['connections']
|
||||
avg_latency = exp['total_latencies'] / exp['tx_count']
|
||||
exp_start_time = exp['block_time_min'] - exp['block_time_min_duration']
|
||||
exp_duration = exp['block_time_max'] - exp_start_time
|
||||
throughput_rate = exp['tx_count'] / exp_duration
|
||||
if conns not in stats:
|
||||
stats[conns] = []
|
||||
|
||||
stats[conns].append({
|
||||
'avg_latency': avg_latency,
|
||||
'throughput_rate': throughput_rate,
|
||||
})
|
||||
|
||||
# Sort stats for each number of connections in order of increasing
|
||||
# throughput rate, and then extract average latencies and throughput rates
|
||||
# as separate data series.
|
||||
conns = sorted(stats.keys())
|
||||
avg_latencies = {}
|
||||
throughput_rates = {}
|
||||
for c in conns:
|
||||
stats[c] = sorted(stats[c], key=lambda s: s['throughput_rate'])
|
||||
avg_latencies[c] = []
|
||||
throughput_rates[c] = []
|
||||
for s in stats[c]:
|
||||
avg_latencies[c].append(s['avg_latency'])
|
||||
throughput_rates[c].append(s['throughput_rate'])
|
||||
logging.info('For %d connection(s): '
|
||||
'throughput rate = %.6f tx/s\t'
|
||||
'average latency = %.6fs' %
|
||||
(c, s['throughput_rate'], s['avg_latency']))
|
||||
|
||||
return (avg_latencies, throughput_rates)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
11
scripts/qa/reporting/requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
contourpy==1.0.5
|
||||
cycler==0.11.0
|
||||
fonttools==4.37.4
|
||||
kiwisolver==1.4.4
|
||||
matplotlib==3.6.1
|
||||
numpy==1.23.4
|
||||
packaging==21.3
|
||||
Pillow==9.2.0
|
||||
pyparsing==3.0.9
|
||||
python-dateutil==2.8.2
|
||||
six==1.16.0
|
||||
@@ -46,7 +46,7 @@ and a list of evidence of malfeasance (ie. signing conflicting votes).
|
||||
|
||||
| Name | Type | Description | Validation |
|
||||
|--------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------|
|
||||
| Header | [Header](#header) | Header corresponding to the block. This field contains information used throughout consensus and other areas of the protocol. To find out what it contains, visit [header] (#header) | Must adhere to the validation rules of [header](#header) |
|
||||
| Header | [Header](#header) | Header corresponding to the block. This field contains information used throughout consensus and other areas of the protocol. To find out what it contains, visit [header](#header) | Must adhere to the validation rules of [header](#header) |
|
||||
| Data | [Data](#data) | Data contains a list of transactions. The contents of the transaction is unknown to Tendermint. | This field can be empty or populated, but no validation is performed. Applications can perform validation on individual transactions prior to block creation using [checkTx](https://github.com/tendermint/tendermint/blob/main/spec/abci/abci++_methods.md#checktx).
|
||||
| Evidence | [EvidenceList](#evidencelist) | Evidence contains a list of infractions committed by validators. | Can be empty, but when populated the validations rules from [evidenceList](#evidencelist) apply |
|
||||
| LastCommit | [Commit](#commit) | `LastCommit` includes one vote for every validator. All votes must either be for the previous block, nil or absent. If a vote is for the previous block it must have a valid signature from the corresponding validator. The sum of the voting power of the validators that voted must be greater than 2/3 of the total voting power of the complete validator set. The number of votes in a commit is limited to 10000 (see `types.MaxVotesCount`). | Must be empty for the initial height and must adhere to the validation rules of [commit](#commit). |
|
||||
@@ -202,12 +202,12 @@ Commit is a simple wrapper for a list of signatures, with one for each validator
|
||||
a particular `BlockID` or was absent. It's a part of the `Commit` and can be used
|
||||
to reconstruct the vote set given the validator set.
|
||||
|
||||
| Name | Type | Description | Validation |
|
||||
|------------------|-----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| BlockIDFlag | [BlockIDFlag](#blockidflag) | Represents the validators participation in consensus: Either voted for the block that received the majority, voted for another block, voted nil or did not vote | Must be one of the fields in the [BlockIDFlag](#blockidflag) enum |
|
||||
| ValidatorAddress | [Address](#address) | Address of the validator | Must be of length 20 |
|
||||
| Timestamp | [Time](#time) | This field will vary from `CommitSig` to `CommitSig`. It represents the timestamp of the validator. | [Time](#time) |
|
||||
| Signature | [Signature](#signature) | Signature corresponding to the validators participation in consensus. | The length of the signature must be > 0 and < than 64 |
|
||||
| Name | Type | Description | Validation |
|
||||
|------------------|-----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| BlockIDFlag | [BlockIDFlag](#blockidflag) | Represents the validators participation in consensus: its vote was not received, voted for the block that received the majority, or voted for nil | Must be one of the fields in the [BlockIDFlag](#blockidflag) enum |
|
||||
| ValidatorAddress | [Address](#address) | Address of the validator | Must be of length 20 |
|
||||
| Timestamp | [Time](#time) | This field will vary from `CommitSig` to `CommitSig`. It represents the timestamp of the validator. | [Time](#time) |
|
||||
| Signature | [Signature](#signature) | Signature corresponding to the validators participation in consensus. | The length of the signature must be > 0 and < than 64 |
|
||||
|
||||
NOTE: `ValidatorAddress` and `Timestamp` fields may be removed in the future
|
||||
(see [ADR-25](https://github.com/tendermint/tendermint/blob/main/docs/architecture/adr-025-commit.md)).
|
||||
@@ -218,10 +218,10 @@ BlockIDFlag represents which BlockID the [signature](#commitsig) is for.
|
||||
|
||||
```go
|
||||
enum BlockIDFlag {
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0;
|
||||
BLOCK_ID_FLAG_ABSENT = 1; // signatures for other blocks are also considered absent
|
||||
BLOCK_ID_FLAG_COMMIT = 2;
|
||||
BLOCK_ID_FLAG_NIL = 3;
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0; // indicates an error condition
|
||||
BLOCK_ID_FLAG_ABSENT = 1; // the vote was not received
|
||||
BLOCK_ID_FLAG_COMMIT = 2; // voted for the block that received the majority
|
||||
BLOCK_ID_FLAG_NIL = 3; // voted for nil
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ type Config struct {
|
||||
PrivValServer string `toml:"privval_server"`
|
||||
PrivValKey string `toml:"privval_key"`
|
||||
PrivValState string `toml:"privval_state"`
|
||||
Misbehaviors map[string]string `toml:"misbehaviors"`
|
||||
KeyType string `toml:"key_type"`
|
||||
}
|
||||
|
||||
|
||||
85
test/e2e/pkg/infra/docker/docker.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package docker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"text/template"
|
||||
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra"
|
||||
)
|
||||
|
||||
var _ infra.Provider = &Provider{}
|
||||
|
||||
// Provider implements a docker-compose backed infrastructure provider.
|
||||
type Provider struct {
|
||||
Testnet *e2e.Testnet
|
||||
}
|
||||
|
||||
// Setup generates the docker-compose file and write it to disk, erroring if
|
||||
// any of these operations fail.
|
||||
func (p *Provider) Setup() error {
|
||||
compose, err := dockerComposeBytes(p.Testnet)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
//nolint: gosec
|
||||
// G306: Expect WriteFile permissions to be 0600 or less
|
||||
err = os.WriteFile(filepath.Join(p.Testnet.Dir, "docker-compose.yml"), compose, 0644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// dockerComposeBytes generates a Docker Compose config file for a testnet and returns the
|
||||
// file as bytes to be written out to disk.
|
||||
func dockerComposeBytes(testnet *e2e.Testnet) ([]byte, error) {
|
||||
// Must use version 2 Docker Compose format, to support IPv6.
|
||||
tmpl, err := template.New("docker-compose").Parse(`version: '2.4'
|
||||
networks:
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
driver: bridge
|
||||
{{- if .IPv6 }}
|
||||
enable_ipv6: true
|
||||
{{- end }}
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
- subnet: {{ .IP }}
|
||||
|
||||
services:
|
||||
{{- range .Nodes }}
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
container_name: {{ .Name }}
|
||||
image: tendermint/e2e-node
|
||||
{{- if eq .ABCIProtocol "builtin" }}
|
||||
entrypoint: /usr/bin/entrypoint-builtin
|
||||
{{- end }}
|
||||
init: true
|
||||
ports:
|
||||
- 26656
|
||||
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
|
||||
- 6060
|
||||
volumes:
|
||||
- ./{{ .Name }}:/tendermint
|
||||
networks:
|
||||
{{ $.Name }}:
|
||||
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
|
||||
|
||||
{{end}}`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
err = tmpl.Execute(&buf, testnet)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
20
test/e2e/pkg/infra/provider.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package infra
|
||||
|
||||
// Provider defines an API for manipulating the infrastructure of a
|
||||
// specific set of testnet infrastructure.
|
||||
type Provider interface {
|
||||
|
||||
// Setup generates any necessary configuration for the infrastructure
|
||||
// provider during testnet setup.
|
||||
Setup() error
|
||||
}
|
||||
|
||||
// NoopProvider implements the provider interface by performing noops for every
|
||||
// interface method. This may be useful if the infrastructure is managed by a
|
||||
// separate process.
|
||||
type NoopProvider struct {
|
||||
}
|
||||
|
||||
func (NoopProvider) Setup() error { return nil }
|
||||
|
||||
var _ Provider = NoopProvider{}
|
||||
80
test/e2e/pkg/infrastructure.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerIPv4CIDR = "10.186.73.0/24"
|
||||
dockerIPv6CIDR = "fd80:b10c::/48"
|
||||
|
||||
globalIPv4CIDR = "0.0.0.0/0"
|
||||
)
|
||||
|
||||
// InfrastructureData contains the relevant information for a set of existing
|
||||
// infrastructure that is to be used for running a testnet.
|
||||
type InfrastructureData struct {
|
||||
|
||||
// Provider is the name of infrastructure provider backing the testnet.
|
||||
// For example, 'docker' if it is running locally in a docker network or
|
||||
// 'digital-ocean', 'aws', 'google', etc. if it is from a cloud provider.
|
||||
Provider string `json:"provider"`
|
||||
|
||||
// Instances is a map of all of the machine instances on which to run
|
||||
// processes for a testnet.
|
||||
// The key of the map is the name of the instance, which each must correspond
|
||||
// to the names of one of the testnet nodes defined in the testnet manifest.
|
||||
Instances map[string]InstanceData `json:"instances"`
|
||||
|
||||
// Network is the CIDR notation range of IP addresses that all of the instances'
|
||||
// IP addresses are expected to be within.
|
||||
Network string `json:"network"`
|
||||
}
|
||||
|
||||
// InstanceData contains the relevant information for a machine instance backing
|
||||
// one of the nodes in the testnet.
|
||||
type InstanceData struct {
|
||||
IPAddress net.IP `json:"ip_address"`
|
||||
}
|
||||
|
||||
func NewDockerInfrastructureData(m Manifest) (InfrastructureData, error) {
|
||||
netAddress := dockerIPv4CIDR
|
||||
if m.IPv6 {
|
||||
netAddress = dockerIPv6CIDR
|
||||
}
|
||||
_, ipNet, err := net.ParseCIDR(netAddress)
|
||||
if err != nil {
|
||||
return InfrastructureData{}, fmt.Errorf("invalid IP network address %q: %w", netAddress, err)
|
||||
}
|
||||
ipGen := newIPGenerator(ipNet)
|
||||
ifd := InfrastructureData{
|
||||
Provider: "docker",
|
||||
Instances: make(map[string]InstanceData),
|
||||
Network: netAddress,
|
||||
}
|
||||
for name := range m.Nodes {
|
||||
ifd.Instances[name] = InstanceData{
|
||||
IPAddress: ipGen.Next(),
|
||||
}
|
||||
}
|
||||
return ifd, nil
|
||||
}
|
||||
|
||||
func InfrastructureDataFromFile(p string) (InfrastructureData, error) {
|
||||
ifd := InfrastructureData{}
|
||||
b, err := os.ReadFile(p)
|
||||
if err != nil {
|
||||
return InfrastructureData{}, err
|
||||
}
|
||||
err = json.Unmarshal(b, &ifd)
|
||||
if err != nil {
|
||||
return InfrastructureData{}, err
|
||||
}
|
||||
if ifd.Network == "" {
|
||||
ifd.Network = globalIPv4CIDR
|
||||
}
|
||||
return ifd, nil
|
||||
}
|
||||
@@ -21,8 +21,6 @@ import (
|
||||
const (
|
||||
randomSeed int64 = 2308084734268
|
||||
proxyPortFirst uint32 = 5701
|
||||
networkIPv4 = "10.186.73.0/24"
|
||||
networkIPv6 = "fd80:b10c::/48"
|
||||
)
|
||||
|
||||
type (
|
||||
@@ -100,32 +98,20 @@ type Node struct {
|
||||
// The testnet generation must be deterministic, since it is generated
|
||||
// separately by the runner and the test cases. For this reason, testnets use a
|
||||
// random seed to generate e.g. keys.
|
||||
func LoadTestnet(file string) (*Testnet, error) {
|
||||
manifest, err := LoadManifest(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dir := strings.TrimSuffix(file, filepath.Ext(file))
|
||||
|
||||
// Set up resource generators. These must be deterministic.
|
||||
netAddress := networkIPv4
|
||||
if manifest.IPv6 {
|
||||
netAddress = networkIPv6
|
||||
}
|
||||
_, ipNet, err := net.ParseCIDR(netAddress)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid IP network address %q: %w", netAddress, err)
|
||||
}
|
||||
|
||||
ipGen := newIPGenerator(ipNet)
|
||||
func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Testnet, error) {
|
||||
dir := strings.TrimSuffix(fname, filepath.Ext(fname))
|
||||
keyGen := newKeyGenerator(randomSeed)
|
||||
proxyPortGen := newPortGenerator(proxyPortFirst)
|
||||
_, ipNet, err := net.ParseCIDR(ifd.Network)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid IP network address %q: %w", ifd.Network, err)
|
||||
}
|
||||
|
||||
testnet := &Testnet{
|
||||
Name: filepath.Base(dir),
|
||||
File: file,
|
||||
File: fname,
|
||||
Dir: dir,
|
||||
IP: ipGen.Network(),
|
||||
IP: ipNet,
|
||||
InitialHeight: 1,
|
||||
InitialState: manifest.InitialState,
|
||||
Validators: map[*Node]int64{},
|
||||
@@ -156,12 +142,16 @@ func LoadTestnet(file string) (*Testnet, error) {
|
||||
|
||||
for _, name := range nodeNames {
|
||||
nodeManifest := manifest.Nodes[name]
|
||||
ind, ok := ifd.Instances[name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("information for node '%s' missing from infrastucture data", name)
|
||||
}
|
||||
node := &Node{
|
||||
Name: name,
|
||||
Testnet: testnet,
|
||||
PrivvalKey: keyGen.Generate(manifest.KeyType),
|
||||
NodeKey: keyGen.Generate("ed25519"),
|
||||
IP: ipGen.Next(),
|
||||
IP: ind.IPAddress,
|
||||
ProxyPort: proxyPortGen.Next(),
|
||||
Mode: ModeValidator,
|
||||
Database: "goleveldb",
|
||||
|
||||
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
@@ -11,6 +12,8 @@ import (
|
||||
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra/docker"
|
||||
)
|
||||
|
||||
const randomSeed = 2308084734268
|
||||
@@ -26,6 +29,7 @@ type CLI struct {
|
||||
root *cobra.Command
|
||||
testnet *e2e.Testnet
|
||||
preserve bool
|
||||
infp infra.Provider
|
||||
}
|
||||
|
||||
// NewCLI sets up the CLI.
|
||||
@@ -41,19 +45,57 @@ func NewCLI() *CLI {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
testnet, err := e2e.LoadTestnet(file)
|
||||
m, err := e2e.LoadManifest(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
inft, err := cmd.Flags().GetString("infrastructure-type")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var ifd e2e.InfrastructureData
|
||||
switch inft {
|
||||
case "docker":
|
||||
var err error
|
||||
ifd, err = e2e.NewDockerInfrastructureData(m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "digital-ocean":
|
||||
p, err := cmd.Flags().GetString("infrastructure-data")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p == "" {
|
||||
return errors.New("'--infrastructure-data' must be set when using the 'digital-ocean' infrastructure-type")
|
||||
}
|
||||
ifd, err = e2e.InfrastructureDataFromFile(p)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing infrastructure data: %s", err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown infrastructure type '%s'", inft)
|
||||
}
|
||||
|
||||
testnet, err := e2e.LoadTestnet(m, file, ifd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("loading testnet: %s", err)
|
||||
}
|
||||
|
||||
cli.testnet = testnet
|
||||
cli.infp = &infra.NoopProvider{}
|
||||
if inft == "docker" {
|
||||
cli.infp = &docker.Provider{Testnet: testnet}
|
||||
}
|
||||
return nil
|
||||
},
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
if err := Cleanup(cli.testnet); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Setup(cli.testnet); err != nil {
|
||||
if err := Setup(cli.testnet, cli.infp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -118,6 +160,10 @@ func NewCLI() *CLI {
|
||||
cli.root.PersistentFlags().StringP("file", "f", "", "Testnet TOML manifest")
|
||||
_ = cli.root.MarkPersistentFlagRequired("file")
|
||||
|
||||
cli.root.PersistentFlags().StringP("infrastructure-type", "", "docker", "Backing infrastructure used to run the testnet. Either 'digital-ocean' or 'docker'")
|
||||
|
||||
cli.root.PersistentFlags().StringP("infrastructure-data", "", "", "path to the json file containing the infrastructure data. Only used if the 'infrastructure-type' is set to a value other than 'docker'")
|
||||
|
||||
cli.root.Flags().BoolVarP(&cli.preserve, "preserve", "p", false,
|
||||
"Preserves the running of the test net after tests are completed")
|
||||
|
||||
@@ -125,7 +171,7 @@ func NewCLI() *CLI {
|
||||
Use: "setup",
|
||||
Short: "Generates the testnet directory and configuration",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return Setup(cli.testnet)
|
||||
return Setup(cli.testnet, cli.infp)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -135,7 +181,7 @@ func NewCLI() *CLI {
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
_, err := os.Stat(cli.testnet.Dir)
|
||||
if os.IsNotExist(err) {
|
||||
err = Setup(cli.testnet)
|
||||
err = Setup(cli.testnet, cli.infp)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -258,7 +304,7 @@ Does not run any perbutations.
|
||||
if err := Cleanup(cli.testnet); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Setup(cli.testnet); err != nil {
|
||||
if err := Setup(cli.testnet, cli.infp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -10,9 +10,7 @@ import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
@@ -23,6 +21,7 @@ import (
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
"github.com/tendermint/tendermint/privval"
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
"github.com/tendermint/tendermint/test/e2e/pkg/infra"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
)
|
||||
|
||||
@@ -39,7 +38,7 @@ const (
|
||||
)
|
||||
|
||||
// Setup sets up the testnet configuration.
|
||||
func Setup(testnet *e2e.Testnet) error {
|
||||
func Setup(testnet *e2e.Testnet, infp infra.Provider) error {
|
||||
logger.Info("setup", "msg", log.NewLazySprintf("Generating testnet files in %q", testnet.Dir))
|
||||
|
||||
err := os.MkdirAll(testnet.Dir, os.ModePerm)
|
||||
@@ -47,11 +46,7 @@ func Setup(testnet *e2e.Testnet) error {
|
||||
return err
|
||||
}
|
||||
|
||||
compose, err := MakeDockerCompose(testnet)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = os.WriteFile(filepath.Join(testnet.Dir, "docker-compose.yml"), compose, 0o644) //nolint:gosec
|
||||
err = infp.Setup()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -126,70 +121,6 @@ func Setup(testnet *e2e.Testnet) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MakeDockerCompose generates a Docker Compose config for a testnet.
|
||||
func MakeDockerCompose(testnet *e2e.Testnet) ([]byte, error) {
|
||||
// Must use version 2 Docker Compose format, to support IPv6.
|
||||
tmpl, err := template.New("docker-compose").Funcs(template.FuncMap{
|
||||
"misbehaviorsToString": func(misbehaviors map[int64]string) string {
|
||||
str := ""
|
||||
for height, misbehavior := range misbehaviors {
|
||||
// after the first behavior set, a comma must be prepended
|
||||
if str != "" {
|
||||
str += ","
|
||||
}
|
||||
heightString := strconv.Itoa(int(height))
|
||||
str += misbehavior + "," + heightString
|
||||
}
|
||||
return str
|
||||
},
|
||||
}).Parse(`version: '2.4'
|
||||
|
||||
networks:
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
driver: bridge
|
||||
{{- if .IPv6 }}
|
||||
enable_ipv6: true
|
||||
{{- end }}
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
- subnet: {{ .IP }}
|
||||
|
||||
services:
|
||||
{{- range .Nodes }}
|
||||
{{ .Name }}:
|
||||
labels:
|
||||
e2e: true
|
||||
container_name: {{ .Name }}
|
||||
image: tendermint/e2e-node
|
||||
{{- if eq .ABCIProtocol "builtin" }}
|
||||
entrypoint: /usr/bin/entrypoint-builtin
|
||||
{{- end }}
|
||||
init: true
|
||||
ports:
|
||||
- 26656
|
||||
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
|
||||
- 6060
|
||||
volumes:
|
||||
- ./{{ .Name }}:/tendermint
|
||||
networks:
|
||||
{{ $.Name }}:
|
||||
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
|
||||
|
||||
{{end}}`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
err = tmpl.Execute(&buf, testnet)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
// MakeGenesis generates a genesis document.
|
||||
func MakeGenesis(testnet *e2e.Testnet) (types.GenesisDoc, error) {
|
||||
genesis := types.GenesisDoc{
|
||||
|
||||
@@ -66,23 +66,27 @@ func testNode(t *testing.T, testFunc func(*testing.T, e2e.Node)) {
|
||||
func loadTestnet(t *testing.T) e2e.Testnet {
|
||||
t.Helper()
|
||||
|
||||
manifest := os.Getenv("E2E_MANIFEST")
|
||||
if manifest == "" {
|
||||
manifestFile := os.Getenv("E2E_MANIFEST")
|
||||
if manifestFile == "" {
|
||||
t.Skip("E2E_MANIFEST not set, not an end-to-end test run")
|
||||
}
|
||||
if !filepath.IsAbs(manifest) {
|
||||
manifest = filepath.Join("..", manifest)
|
||||
if !filepath.IsAbs(manifestFile) {
|
||||
manifestFile = filepath.Join("..", manifestFile)
|
||||
}
|
||||
|
||||
testnetCacheMtx.Lock()
|
||||
defer testnetCacheMtx.Unlock()
|
||||
if testnet, ok := testnetCache[manifest]; ok {
|
||||
if testnet, ok := testnetCache[manifestFile]; ok {
|
||||
return testnet
|
||||
}
|
||||
|
||||
testnet, err := e2e.LoadTestnet(manifest)
|
||||
m, err := e2e.LoadManifest(manifestFile)
|
||||
require.NoError(t, err)
|
||||
testnetCache[manifest] = *testnet
|
||||
ifd, err := e2e.NewDockerInfrastructureData(m)
|
||||
require.NoError(t, err)
|
||||
|
||||
testnet, err := e2e.LoadTestnet(m, manifestFile, ifd)
|
||||
require.NoError(t, err)
|
||||
testnetCache[manifestFile] = *testnet
|
||||
return *testnet
|
||||
}
|
||||
|
||||
|
||||